From 8f6f9833b3e40b04e2cec7030af23ff7d402a402 Mon Sep 17 00:00:00 2001
From: Tom Benson <30674819+benson31@users.noreply.github.com>
Date: Tue, 7 Sep 2021 14:32:53 -0400
Subject: [PATCH 01/37] Entrywise operators (#1955)

* updates and cleanup to clamp operator

* unary/binary math operators; cereal registration; unit tests for abs, add, subtract, multiply, cos, sin

* Add more operators: activations and associated infrastructure.

* remove old layers; entrywise loss layers to operators

* add python classes for all the new operators

* Remove complex numbers from the abs test when running under ROCm

* Tiny bit of tidying

* quick factoring to use the common macro
---
 include/lbann/layers/loss/entrywise.hpp       |  76 ---
 include/lbann/layers/math/CMakeLists.txt      |   2 -
 include/lbann/layers/math/binary.hpp          | 142 -----
 include/lbann/layers/math/math_builders.hpp   |  49 --
 include/lbann/layers/math/unary.hpp           | 142 -----
 include/lbann/layers/operator_layer_impl.hpp  |   3 +-
 include/lbann/lbann.hpp                       |   5 +-
 include/lbann/operators/CMakeLists.txt        |   1 +
 .../operators/activations/CMakeLists.txt      |   7 +
 .../activations/activation_builders.hpp       |  23 +-
 .../activations/activation_builders_impl.hpp  |  36 ++
 .../activations/activations.hpp               |  45 +-
 include/lbann/operators/builder_macros.hpp    |  88 +++
 .../lbann/operators/declare_stateless_op.hpp  | 125 ++++
 .../lbann/operators/elementwise_operator.hpp  |   8 +-
 include/lbann/operators/loss/CMakeLists.txt   |   9 +
 .../lbann/operators/loss/entrywise.hpp        |  34 +-
 .../lbann/operators/loss/loss_builders.hpp    |  28 +-
 .../operators/loss/loss_builders_impl.hpp     |  29 +-
 include/lbann/operators/math/CMakeLists.txt   |   3 +
 include/lbann/operators/math/abs.hpp          | 129 ++++
 include/lbann/operators/math/binary.hpp       |  63 ++
 include/lbann/operators/math/clamp.hpp        |  21 +-
 .../lbann/operators/math/math_builders.hpp    |  67 ++-
 .../operators/math/math_builders_impl.hpp     |  78 ++-
 include/lbann/operators/math/unary.hpp        |  83 +++
 include/lbann/operators/operator.hpp          |  38 +-
 include/lbann/proto/datatype_helpers.hpp      |  12 +
 include/lbann/proto/operator_factory_impl.hpp |  72 ++-
 include/lbann/utils/exception.hpp             |   6 +
 include/lbann/utils/tensor.hpp                |   6 +-
 python/lbann/core/operators.py                | 531 ++++++++++++++++
 .../ltfb/mutation_strategy.cpp                |  20 +-
 src/layers/activations/CMakeLists.txt         |   2 -
 src/layers/activations/activations.cpp        | 168 ------
 src/layers/activations/activations.cu         | 173 ------
 .../cereal_registration/CMakeLists.txt        |   5 -
 .../activations/cereal_registration/selu.cpp  |  44 --
 .../cereal_registration/sigmoid.cpp           |  44 --
 src/layers/loss/CMakeLists.txt                |   2 -
 .../loss/cereal_registration/CMakeLists.txt   |   5 -
 src/layers/loss/entrywise.cpp                 | 255 --------
 src/layers/math/CMakeLists.txt                |  10 -
 src/layers/math/binary.cpp                    | 512 ----------------
 src/layers/math/binary.cu                     | 545 -----------------
 .../math/cereal_registration/CMakeLists.txt   |  49 --
 src/layers/math/cereal_registration/add.cpp   |  44 --
 src/layers/math/cereal_registration/asin.cpp  |  44 --
 src/layers/math/cereal_registration/asinh.cpp |  44 --
 src/layers/math/cereal_registration/atan.cpp  |  44 --
 src/layers/math/cereal_registration/atanh.cpp |  44 --
 src/layers/math/cereal_registration/ceil.cpp  |  44 --
 src/layers/math/cereal_registration/cosh.cpp  |  44 --
 .../math/cereal_registration/divide.cpp       |  44 --
 src/layers/math/cereal_registration/equal.cpp |  44 --
 .../math/cereal_registration/erfinv.cpp       |  44 --
 src/layers/math/cereal_registration/expm1.cpp |  44 --
 src/layers/math/cereal_registration/floor.cpp |  44 --
 .../math/cereal_registration/greater.cpp      |  44 --
 .../cereal_registration/greater_equal.cpp     |  44 --
 src/layers/math/cereal_registration/less.cpp  |  44 --
 .../math/cereal_registration/less_equal.cpp   |  44 --
 src/layers/math/cereal_registration/log1p.cpp |  44 --
 .../math/cereal_registration/logical_and.cpp  |  44 --
 .../math/cereal_registration/logical_not.cpp  |  44 --
 .../math/cereal_registration/logical_or.cpp   |  44 --
 .../math/cereal_registration/logical_xor.cpp  |  44 --
 src/layers/math/cereal_registration/max.cpp   |  44 --
 src/layers/math/cereal_registration/min.cpp   |  44 --
 src/layers/math/cereal_registration/mod.cpp   |  44 --
 .../math/cereal_registration/multiply.cpp     |  44 --
 .../math/cereal_registration/negative.cpp     |  44 --
 .../math/cereal_registration/not_equal.cpp    |  44 --
 src/layers/math/cereal_registration/pow.cpp   |  44 --
 .../math/cereal_registration/reciprocal.cpp   |  44 --
 src/layers/math/cereal_registration/round.cpp |  44 --
 src/layers/math/cereal_registration/rsqrt.cpp |  44 --
 .../math/cereal_registration/safe_divide.cpp  |  44 --
 .../cereal_registration/safe_reciprocal.cpp   |  44 --
 src/layers/math/cereal_registration/sign.cpp  |  44 --
 src/layers/math/cereal_registration/sinh.cpp  |  44 --
 src/layers/math/cereal_registration/sqrt.cpp  |  44 --
 .../math/cereal_registration/square.cpp       |  44 --
 .../squared_difference.cpp                    |  44 --
 .../math/cereal_registration/subtract.cpp     |  44 --
 src/layers/math/cereal_registration/tanh.cpp  |  44 --
 src/layers/math/math_builders.cpp             | 103 +---
 src/layers/math/unary.cpp                     | 514 ----------------
 src/layers/math/unary.cu                      | 481 ---------------
 src/models/unit_test/modify_test.cpp          |   1 -
 src/operators/CMakeLists.txt                  |   2 +
 src/operators/activations/CMakeLists.txt      |  18 +
 .../activations/activation_builders.cpp}      |  26 +-
 src/operators/activations/activations.cpp     | 204 +++++++
 src/operators/activations/activations.cu      | 197 ++++++
 .../cereal_registration/CMakeLists.txt        |  11 +
 .../cereal_registration/log_sigmoid.cpp       |  31 +
 .../activations/cereal_registration/selu.cpp  |  31 +
 .../cereal_registration/sigmoid.cpp           |  31 +
 .../cereal_registration/softplus.cpp          |  31 +
 .../cereal_registration/softsign.cpp          |  31 +
 src/operators/loss/CMakeLists.txt             |  18 +
 .../loss/cereal_registration/CMakeLists.txt   |  11 +
 .../binary_cross_entropy.cpp                  |  31 +
 .../cereal_registration/boolean_accuracy.cpp  |  31 +
 .../boolean_false_negative.cpp                |  31 +
 .../boolean_false_positive.cpp                |  31 +
 .../sigmoid_binary_cross_entropy.cpp          |  31 +
 src/operators/loss/entrywise.cpp              | 246 ++++++++
 src/operators/loss/entrywise.cu               | 243 ++++++++
 .../loss/loss_builders.cpp}                   |  26 +-
 src/operators/math/CMakeLists.txt             |  10 +
 src/operators/math/abs.cpp                    | 114 ++++
 src/operators/math/abs.cu                     | 108 ++++
 src/operators/math/binary.cpp                 | 543 +++++++++++++++++
 src/operators/math/binary.cu                  | 510 ++++++++++++++++
 .../math/cereal_registration/CMakeLists.txt   |  49 ++
 .../math/cereal_registration/abs.cpp          |  59 ++
 .../math/cereal_registration/acos.cpp         |  31 +
 .../math/cereal_registration/acosh.cpp        |  31 +
 .../math/cereal_registration/add.cpp          |  31 +
 .../math/cereal_registration/asin.cpp         |  31 +
 .../math/cereal_registration/asinh.cpp        |  31 +
 .../math/cereal_registration/atan.cpp         |  31 +
 .../math/cereal_registration/atanh.cpp        |  31 +
 .../math/cereal_registration/ceil.cpp         |  31 +
 .../math/cereal_registration/clamp.cpp        |   2 +-
 .../math/cereal_registration/cos.cpp          |  23 +-
 .../math/cereal_registration/cosh.cpp         |  31 +
 .../math/cereal_registration/divide.cpp       |  31 +
 .../math/cereal_registration/equal.cpp        |  31 +
 .../math/cereal_registration/erf.cpp          |  23 +-
 .../math/cereal_registration/erfinv.cpp       |  31 +
 .../math/cereal_registration/exp.cpp          |  23 +-
 .../math/cereal_registration/expm1.cpp        |  31 +
 .../math/cereal_registration/floor.cpp        |  31 +
 .../math/cereal_registration/greater.cpp      |  31 +
 .../cereal_registration/greater_equal.cpp     |  31 +
 .../math/cereal_registration/less.cpp         |  31 +
 .../math/cereal_registration/less_equal.cpp   |  31 +
 .../math/cereal_registration/log.cpp          |  23 +-
 .../math/cereal_registration/log1p.cpp        |  31 +
 .../math/cereal_registration/logical_and.cpp  |  31 +
 .../math/cereal_registration/logical_not.cpp  |  31 +
 .../math/cereal_registration/logical_or.cpp   |  31 +
 .../math/cereal_registration/logical_xor.cpp  |  31 +
 .../math/cereal_registration/max.cpp          |  31 +
 .../math/cereal_registration/min.cpp          |  31 +
 .../math/cereal_registration/mod.cpp          |  31 +
 .../math/cereal_registration/multiply.cpp     |  31 +
 .../math/cereal_registration/negative.cpp     |  31 +
 .../math/cereal_registration/not_equal.cpp    |  31 +
 .../math/cereal_registration/pow.cpp          |  31 +
 .../math/cereal_registration/reciprocal.cpp   |  31 +
 .../math/cereal_registration/round.cpp        |  31 +
 .../math/cereal_registration/rsqrt.cpp        |  31 +
 .../math/cereal_registration/safe_divide.cpp  |  31 +
 .../cereal_registration/safe_reciprocal.cpp   |  31 +
 .../math/cereal_registration/sign.cpp         |  31 +
 .../math/cereal_registration/sin.cpp          |  23 +-
 .../math/cereal_registration/sinh.cpp         |  31 +
 .../math/cereal_registration/sqrt.cpp         |  31 +
 .../math/cereal_registration/square.cpp       |  31 +
 .../squared_difference.cpp                    |  31 +
 .../math/cereal_registration/subtract.cpp     |  31 +
 .../math/cereal_registration/tan.cpp          |  23 +-
 .../math/cereal_registration/tanh.cpp         |  31 +
 src/operators/math/clamp.cpp                  |  78 +--
 src/operators/math/common.cuh                 | 240 ++++++++
 src/operators/math/common.hpp                 | 122 ++++
 src/operators/math/math_builders.cpp          |  69 ++-
 src/operators/math/unary.cpp                  | 567 ++++++++++++++++++
 src/operators/math/unary.cu                   | 486 +++++++++++++++
 src/operators/math/unit_test/CMakeLists.txt   |   6 +
 .../math/unit_test/OperatorTraits.hpp         | 100 +++
 src/operators/math/unit_test/abs_test.cpp     | 313 ++++++++++
 src/operators/math/unit_test/add_test.cpp     | 308 ++++++++++
 src/operators/math/unit_test/clamp_test.cpp   | 118 ++--
 src/operators/math/unit_test/cos_test.cpp     | 321 ++++++++++
 .../math/unit_test/multiply_test.cpp          | 310 ++++++++++
 src/operators/math/unit_test/sin_test.cpp     | 319 ++++++++++
 .../math/unit_test/subtract_test.cpp          | 312 ++++++++++
 src/proto/datatype.proto                      |   2 +
 src/proto/factories/layer_factory.cpp         |  63 +-
 src/proto/layers.proto                        | 139 +----
 src/proto/operators.proto                     |  98 +++
 unit_test/utilities/TestHelpers.hpp           |   6 +
 187 files changed, 9067 insertions(+), 5711 deletions(-)
 delete mode 100644 include/lbann/layers/loss/entrywise.hpp
 delete mode 100644 include/lbann/layers/math/binary.hpp
 delete mode 100644 include/lbann/layers/math/unary.hpp
 create mode 100644 include/lbann/operators/activations/CMakeLists.txt
 rename src/layers/math/cereal_registration/abs.cpp => include/lbann/operators/activations/activation_builders.hpp (66%)
 create mode 100644 include/lbann/operators/activations/activation_builders_impl.hpp
 rename include/lbann/{layers => operators}/activations/activations.hpp (63%)
 create mode 100644 include/lbann/operators/builder_macros.hpp
 create mode 100644 include/lbann/operators/declare_stateless_op.hpp
 create mode 100644 include/lbann/operators/loss/CMakeLists.txt
 rename src/layers/math/cereal_registration/acosh.cpp => include/lbann/operators/loss/entrywise.hpp (52%)
 rename src/layers/activations/cereal_registration/log_sigmoid.cpp => include/lbann/operators/loss/loss_builders.hpp (64%)
 rename src/layers/math/cereal_registration/acos.cpp => include/lbann/operators/loss/loss_builders_impl.hpp (58%)
 create mode 100644 include/lbann/operators/math/abs.hpp
 create mode 100644 include/lbann/operators/math/binary.hpp
 create mode 100644 include/lbann/operators/math/unary.hpp
 delete mode 100644 src/layers/activations/activations.cpp
 delete mode 100644 src/layers/activations/activations.cu
 delete mode 100644 src/layers/activations/cereal_registration/selu.cpp
 delete mode 100644 src/layers/activations/cereal_registration/sigmoid.cpp
 delete mode 100644 src/layers/loss/entrywise.cpp
 delete mode 100644 src/layers/math/binary.cpp
 delete mode 100644 src/layers/math/binary.cu
 delete mode 100644 src/layers/math/cereal_registration/add.cpp
 delete mode 100644 src/layers/math/cereal_registration/asin.cpp
 delete mode 100644 src/layers/math/cereal_registration/asinh.cpp
 delete mode 100644 src/layers/math/cereal_registration/atan.cpp
 delete mode 100644 src/layers/math/cereal_registration/atanh.cpp
 delete mode 100644 src/layers/math/cereal_registration/ceil.cpp
 delete mode 100644 src/layers/math/cereal_registration/cosh.cpp
 delete mode 100644 src/layers/math/cereal_registration/divide.cpp
 delete mode 100644 src/layers/math/cereal_registration/equal.cpp
 delete mode 100644 src/layers/math/cereal_registration/erfinv.cpp
 delete mode 100644 src/layers/math/cereal_registration/expm1.cpp
 delete mode 100644 src/layers/math/cereal_registration/floor.cpp
 delete mode 100644 src/layers/math/cereal_registration/greater.cpp
 delete mode 100644 src/layers/math/cereal_registration/greater_equal.cpp
 delete mode 100644 src/layers/math/cereal_registration/less.cpp
 delete mode 100644 src/layers/math/cereal_registration/less_equal.cpp
 delete mode 100644 src/layers/math/cereal_registration/log1p.cpp
 delete mode 100644 src/layers/math/cereal_registration/logical_and.cpp
 delete mode 100644 src/layers/math/cereal_registration/logical_not.cpp
 delete mode 100644 src/layers/math/cereal_registration/logical_or.cpp
 delete mode 100644 src/layers/math/cereal_registration/logical_xor.cpp
 delete mode 100644 src/layers/math/cereal_registration/max.cpp
 delete mode 100644 src/layers/math/cereal_registration/min.cpp
 delete mode 100644 src/layers/math/cereal_registration/mod.cpp
 delete mode 100644 src/layers/math/cereal_registration/multiply.cpp
 delete mode 100644 src/layers/math/cereal_registration/negative.cpp
 delete mode 100644 src/layers/math/cereal_registration/not_equal.cpp
 delete mode 100644 src/layers/math/cereal_registration/pow.cpp
 delete mode 100644 src/layers/math/cereal_registration/reciprocal.cpp
 delete mode 100644 src/layers/math/cereal_registration/round.cpp
 delete mode 100644 src/layers/math/cereal_registration/rsqrt.cpp
 delete mode 100644 src/layers/math/cereal_registration/safe_divide.cpp
 delete mode 100644 src/layers/math/cereal_registration/safe_reciprocal.cpp
 delete mode 100644 src/layers/math/cereal_registration/sign.cpp
 delete mode 100644 src/layers/math/cereal_registration/sinh.cpp
 delete mode 100644 src/layers/math/cereal_registration/sqrt.cpp
 delete mode 100644 src/layers/math/cereal_registration/square.cpp
 delete mode 100644 src/layers/math/cereal_registration/squared_difference.cpp
 delete mode 100644 src/layers/math/cereal_registration/subtract.cpp
 delete mode 100644 src/layers/math/cereal_registration/tanh.cpp
 delete mode 100644 src/layers/math/unary.cpp
 delete mode 100644 src/layers/math/unary.cu
 create mode 100644 src/operators/activations/CMakeLists.txt
 rename src/{layers/activations/cereal_registration/softsign.cpp => operators/activations/activation_builders.cpp} (63%)
 create mode 100644 src/operators/activations/activations.cpp
 create mode 100644 src/operators/activations/activations.cu
 create mode 100644 src/operators/activations/cereal_registration/CMakeLists.txt
 create mode 100644 src/operators/activations/cereal_registration/log_sigmoid.cpp
 create mode 100644 src/operators/activations/cereal_registration/selu.cpp
 create mode 100644 src/operators/activations/cereal_registration/sigmoid.cpp
 create mode 100644 src/operators/activations/cereal_registration/softplus.cpp
 create mode 100644 src/operators/activations/cereal_registration/softsign.cpp
 create mode 100644 src/operators/loss/CMakeLists.txt
 create mode 100644 src/operators/loss/cereal_registration/CMakeLists.txt
 create mode 100644 src/operators/loss/cereal_registration/binary_cross_entropy.cpp
 create mode 100644 src/operators/loss/cereal_registration/boolean_accuracy.cpp
 create mode 100644 src/operators/loss/cereal_registration/boolean_false_negative.cpp
 create mode 100644 src/operators/loss/cereal_registration/boolean_false_positive.cpp
 create mode 100644 src/operators/loss/cereal_registration/sigmoid_binary_cross_entropy.cpp
 create mode 100644 src/operators/loss/entrywise.cpp
 create mode 100644 src/operators/loss/entrywise.cu
 rename src/{layers/activations/cereal_registration/softplus.cpp => operators/loss/loss_builders.cpp} (63%)
 create mode 100644 src/operators/math/abs.cpp
 create mode 100644 src/operators/math/abs.cu
 create mode 100644 src/operators/math/binary.cpp
 create mode 100644 src/operators/math/binary.cu
 create mode 100644 src/operators/math/cereal_registration/abs.cpp
 create mode 100644 src/operators/math/cereal_registration/acos.cpp
 create mode 100644 src/operators/math/cereal_registration/acosh.cpp
 create mode 100644 src/operators/math/cereal_registration/add.cpp
 create mode 100644 src/operators/math/cereal_registration/asin.cpp
 create mode 100644 src/operators/math/cereal_registration/asinh.cpp
 create mode 100644 src/operators/math/cereal_registration/atan.cpp
 create mode 100644 src/operators/math/cereal_registration/atanh.cpp
 create mode 100644 src/operators/math/cereal_registration/ceil.cpp
 rename src/{layers => operators}/math/cereal_registration/cos.cpp (66%)
 create mode 100644 src/operators/math/cereal_registration/cosh.cpp
 create mode 100644 src/operators/math/cereal_registration/divide.cpp
 create mode 100644 src/operators/math/cereal_registration/equal.cpp
 rename src/{layers => operators}/math/cereal_registration/erf.cpp (66%)
 create mode 100644 src/operators/math/cereal_registration/erfinv.cpp
 rename src/{layers => operators}/math/cereal_registration/exp.cpp (66%)
 create mode 100644 src/operators/math/cereal_registration/expm1.cpp
 create mode 100644 src/operators/math/cereal_registration/floor.cpp
 create mode 100644 src/operators/math/cereal_registration/greater.cpp
 create mode 100644 src/operators/math/cereal_registration/greater_equal.cpp
 create mode 100644 src/operators/math/cereal_registration/less.cpp
 create mode 100644 src/operators/math/cereal_registration/less_equal.cpp
 rename src/{layers => operators}/math/cereal_registration/log.cpp (66%)
 create mode 100644 src/operators/math/cereal_registration/log1p.cpp
 create mode 100644 src/operators/math/cereal_registration/logical_and.cpp
 create mode 100644 src/operators/math/cereal_registration/logical_not.cpp
 create mode 100644 src/operators/math/cereal_registration/logical_or.cpp
 create mode 100644 src/operators/math/cereal_registration/logical_xor.cpp
 create mode 100644 src/operators/math/cereal_registration/max.cpp
 create mode 100644 src/operators/math/cereal_registration/min.cpp
 create mode 100644 src/operators/math/cereal_registration/mod.cpp
 create mode 100644 src/operators/math/cereal_registration/multiply.cpp
 create mode 100644 src/operators/math/cereal_registration/negative.cpp
 create mode 100644 src/operators/math/cereal_registration/not_equal.cpp
 create mode 100644 src/operators/math/cereal_registration/pow.cpp
 create mode 100644 src/operators/math/cereal_registration/reciprocal.cpp
 create mode 100644 src/operators/math/cereal_registration/round.cpp
 create mode 100644 src/operators/math/cereal_registration/rsqrt.cpp
 create mode 100644 src/operators/math/cereal_registration/safe_divide.cpp
 create mode 100644 src/operators/math/cereal_registration/safe_reciprocal.cpp
 create mode 100644 src/operators/math/cereal_registration/sign.cpp
 rename src/{layers => operators}/math/cereal_registration/sin.cpp (66%)
 create mode 100644 src/operators/math/cereal_registration/sinh.cpp
 create mode 100644 src/operators/math/cereal_registration/sqrt.cpp
 create mode 100644 src/operators/math/cereal_registration/square.cpp
 create mode 100644 src/operators/math/cereal_registration/squared_difference.cpp
 create mode 100644 src/operators/math/cereal_registration/subtract.cpp
 rename src/{layers => operators}/math/cereal_registration/tan.cpp (66%)
 create mode 100644 src/operators/math/cereal_registration/tanh.cpp
 create mode 100644 src/operators/math/common.cuh
 create mode 100644 src/operators/math/common.hpp
 create mode 100644 src/operators/math/unary.cpp
 create mode 100644 src/operators/math/unary.cu
 create mode 100644 src/operators/math/unit_test/OperatorTraits.hpp
 create mode 100644 src/operators/math/unit_test/abs_test.cpp
 create mode 100644 src/operators/math/unit_test/add_test.cpp
 create mode 100644 src/operators/math/unit_test/cos_test.cpp
 create mode 100644 src/operators/math/unit_test/multiply_test.cpp
 create mode 100644 src/operators/math/unit_test/sin_test.cpp
 create mode 100644 src/operators/math/unit_test/subtract_test.cpp

diff --git a/include/lbann/layers/loss/entrywise.hpp b/include/lbann/layers/loss/entrywise.hpp
deleted file mode 100644
index 6bb9ab1b15f..00000000000
--- a/include/lbann/layers/loss/entrywise.hpp
+++ /dev/null
@@ -1,76 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef LBANN_LAYERS_LOSS_ENTRYWISE_HPP_INCLUDED
-#define LBANN_LAYERS_LOSS_ENTRYWISE_HPP_INCLUDED
-
-#include "lbann/layers/math/binary.hpp"
-
-namespace lbann {
-
-#ifndef LBANN_ENTRYWISE_LAYER_INSTANTIATE
-#define BINARY_ETI_DECL_MACRO_DEV(LAYER_NAME, T, DEVICE)                 \
-  extern template class LAYER_NAME<T, data_layout::DATA_PARALLEL, DEVICE>; \
-  extern template class LAYER_NAME<T, data_layout::MODEL_PARALLEL, DEVICE>
-#else
-#define BINARY_ETI_DECL_MACRO_DEV(...)
-#endif // LBANN_BINARY_LAYER_INSTANTIATE
-
-#ifdef LBANN_HAS_GPU
-#define BINARY_ETI_DECL_MACRO(LAYER_NAME, T)                      \
-  BINARY_ETI_DECL_MACRO_DEV(LAYER_NAME, T, El::Device::CPU);       \
-  BINARY_ETI_DECL_MACRO_DEV(LAYER_NAME, T, El::Device::GPU)
-#else
-#define BINARY_ETI_DECL_MACRO(LAYER_NAME, T)                \
-  BINARY_ETI_DECL_MACRO_DEV(LAYER_NAME, T, El::Device::CPU)
-#endif // LBANN_HAS_GPU
-
-// Convenience macro to define an entry-wise binary layer class
-#define DEFINE_ENTRYWISE_BINARY_LAYER(layer_name, layer_string)         \
-  LBANN_DECLARE_ENTRYWISE_BINARY_LAYER(layer_name, layer_string);       \
-  BINARY_ETI_DECL_MACRO(layer_name, float);                             \
-  BINARY_ETI_DECL_MACRO(layer_name, double)
-
-// Cross entropy loss
-DEFINE_ENTRYWISE_BINARY_LAYER(binary_cross_entropy_layer,
-                              "binary cross entropy");
-DEFINE_ENTRYWISE_BINARY_LAYER(sigmoid_binary_cross_entropy_layer,
-                              "sigmoid binary cross entropy");
-
-// Boolean loss functions
-DEFINE_ENTRYWISE_BINARY_LAYER(boolean_accuracy_layer, "Boolean accuracy");
-DEFINE_ENTRYWISE_BINARY_LAYER(boolean_false_negative_layer,
-                              "Boolean false negative rate");
-DEFINE_ENTRYWISE_BINARY_LAYER(boolean_false_positive_layer,
-                              "Boolean false positive rate");
-
-} // namespace lbann
-
-#undef DEFINE_ENTRYWISE_BINARY_LAYER
-#undef BINARY_ETI_DECL_MACRO
-#undef BINARY_ETI_DECL_MACRO_DEV
-
-#endif // LBANN_LAYERS_LOSS_ENTRYWISE_HPP_INCLUDED
diff --git a/include/lbann/layers/math/CMakeLists.txt b/include/lbann/layers/math/CMakeLists.txt
index 62239b4eb22..dedbae16d91 100644
--- a/include/lbann/layers/math/CMakeLists.txt
+++ b/include/lbann/layers/math/CMakeLists.txt
@@ -1,7 +1,5 @@
 # Add the headers for this directory
 set_full_path(THIS_DIR_HEADERS
-  unary.hpp
-  binary.hpp
   matmul.hpp
   )
 
diff --git a/include/lbann/layers/math/binary.hpp b/include/lbann/layers/math/binary.hpp
deleted file mode 100644
index 731d600db35..00000000000
--- a/include/lbann/layers/math/binary.hpp
+++ /dev/null
@@ -1,142 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef LBANN_LAYERS_MATH_BINARY_HPP_INCLUDED
-#define LBANN_LAYERS_MATH_BINARY_HPP_INCLUDED
-
-#include "lbann/layers/data_type_layer.hpp"
-
-namespace lbann {
-
-#define LBANN_DECLARE_ENTRYWISE_BINARY_LAYER(LAYER_NAME, LAYER_STRING)      \
-  template <typename TensorDataType, data_layout Layout, El::Device Device> \
-  class LAYER_NAME : public data_type_layer<TensorDataType> {               \
-  public:                                                                   \
-    LAYER_NAME(lbann_comm *comm) : data_type_layer<TensorDataType>(comm) {  \
-      this->m_expected_num_parent_layers = 2;                               \
-    }                                                                       \
-    LAYER_NAME() : LAYER_NAME(nullptr) {}                                   \
-    LAYER_NAME* copy() const override {                                     \
-      return new LAYER_NAME<TensorDataType,Layout,Device>(*this);           \
-    }                                                                       \
-    std::string get_type() const override { return LAYER_STRING; }          \
-    data_layout get_data_layout() const override { return Layout; }         \
-    El::Device get_device_allocation() const override { return Device; }    \
-    template <typename ArchiveT>                                            \
-    void serialize(ArchiveT& ar);                                           \
-  protected:                                                                \
-    void setup_dims(DataReaderMetaData& dr_metadata) override {             \
-      data_type_layer<TensorDataType>::setup_dims(dr_metadata);             \
-      this->set_output_dims(this->get_input_dims());                        \
-      /* Check that input dimensions match */                               \
-      if (this->get_input_dims(0) != this->get_input_dims(1)) {             \
-        const auto& parents = this->get_parent_layers();                    \
-        std::stringstream err;                                              \
-        err << this->get_type() << " layer \"" << this->get_name() << "\" " \
-            << "has input tensors with different dimensions (";             \
-        for (int i = 0; i < this->get_num_parents(); ++i) {                 \
-          const auto& dims = this->get_input_dims(i);                       \
-          err << (i > 0 ? ", " : "")                                        \
-              << "layer \"" << parents[i]->get_name() << "\" outputs ";     \
-          for (size_t j = 0; j < dims.size(); ++j) {                        \
-            err << (j > 0 ? " x " : "") << dims[j];                         \
-          }                                                                 \
-        }                                                                   \
-        err << ")";                                                         \
-        LBANN_ERROR(err.str());                                             \
-      }                                                                     \
-    }                                                                       \
-    void fp_compute() override;                                             \
-    void bp_compute() override;                                             \
-  }
-
-// Convenience macros for ETI decls for binary layers
-
-#ifndef LBANN_BINARY_LAYER_INSTANTIATE
-#define BINARY_ETI_DECL_MACRO_DEV(LAYER_NAME, T, DEVICE)                   \
-  extern template class LAYER_NAME<T, data_layout::DATA_PARALLEL, DEVICE>; \
-  extern template class LAYER_NAME<T, data_layout::MODEL_PARALLEL, DEVICE>
-#else
-#define BINARY_ETI_DECL_MACRO_DEV(...)
-#endif // LBANN_BINARY_LAYER_INSTANTIATE
-
-// Instnatiate both data and model parallel layers
-#define BINARY_ETI_INST_MACRO_DEV_DT(LAYER_NAME, T, DEVICE)             \
-  template class LAYER_NAME<T, data_layout::DATA_PARALLEL, DEVICE>;  \
-  template class LAYER_NAME<T, data_layout::MODEL_PARALLEL, DEVICE>
-
-// Instantiate a DEVICE for each allowed tensor data type
-#define BINARY_ETI_INST_MACRO_DEV(LAYER_NAME, DEVICE)      \
-  BINARY_ETI_INST_MACRO_DEV_DT(LAYER_NAME, float, DEVICE); \
-  BINARY_ETI_INST_MACRO_DEV_DT(LAYER_NAME, double, DEVICE)
-
-#ifdef LBANN_HAS_GPU
-#define BINARY_ETI_DECL_MACRO(LAYER_NAME, T)                 \
-  BINARY_ETI_DECL_MACRO_DEV(LAYER_NAME, T, El::Device::CPU); \
-  BINARY_ETI_DECL_MACRO_DEV(LAYER_NAME, T, El::Device::GPU)
-#else
-#define BINARY_ETI_DECL_MACRO(LAYER_NAME, T)                 \
-  BINARY_ETI_DECL_MACRO_DEV(LAYER_NAME, T, El::Device::CPU)
-#endif // LBANN_HAS_GPU
-
-// Convenience macro to define an entry-wise binary layer class
-#define DEFINE_ENTRYWISE_BINARY_LAYER(layer_name, layer_string)         \
-  LBANN_DECLARE_ENTRYWISE_BINARY_LAYER(layer_name, layer_string);       \
-  BINARY_ETI_DECL_MACRO(layer_name, float);                             \
-  BINARY_ETI_DECL_MACRO(layer_name, double)
-
-// Arithmetic operations
-DEFINE_ENTRYWISE_BINARY_LAYER(add_layer,                "add");
-DEFINE_ENTRYWISE_BINARY_LAYER(subtract_layer,           "subtract");
-DEFINE_ENTRYWISE_BINARY_LAYER(multiply_layer,           "multiply");
-DEFINE_ENTRYWISE_BINARY_LAYER(divide_layer,             "divide");
-DEFINE_ENTRYWISE_BINARY_LAYER(mod_layer,                "modulo");
-DEFINE_ENTRYWISE_BINARY_LAYER(pow_layer,                "power");
-DEFINE_ENTRYWISE_BINARY_LAYER(safe_divide_layer,        "safe divide");
-DEFINE_ENTRYWISE_BINARY_LAYER(squared_difference_layer, "squared difference");
-
-// Comparison operations
-DEFINE_ENTRYWISE_BINARY_LAYER(max_layer,           "maximum");
-DEFINE_ENTRYWISE_BINARY_LAYER(min_layer,           "minimum");
-DEFINE_ENTRYWISE_BINARY_LAYER(equal_layer,         "equal");
-DEFINE_ENTRYWISE_BINARY_LAYER(not_equal_layer,     "not equal");
-DEFINE_ENTRYWISE_BINARY_LAYER(less_layer,          "less than");
-DEFINE_ENTRYWISE_BINARY_LAYER(less_equal_layer,    "less than or equal");
-DEFINE_ENTRYWISE_BINARY_LAYER(greater_layer,       "greater than");
-DEFINE_ENTRYWISE_BINARY_LAYER(greater_equal_layer, "greater than or equal");
-
-// Logical operations
-DEFINE_ENTRYWISE_BINARY_LAYER(logical_and_layer, "logical and");
-DEFINE_ENTRYWISE_BINARY_LAYER(logical_or_layer,  "logical or");
-DEFINE_ENTRYWISE_BINARY_LAYER(logical_xor_layer, "logical xor");
-
-} // namespace lbann
-
-#undef DEFINE_ENTRYWISE_BINARY_LAYER
-#undef BINARY_ETI_DECL_MACRO
-#undef BINARY_ETI_DECL_MACRO_DEV
-
-#endif // LBANN_LAYERS_MATH_BINARY_HPP_INCLUDED
diff --git a/include/lbann/layers/math/math_builders.hpp b/include/lbann/layers/math/math_builders.hpp
index 6d81bad4bea..eb476b163cc 100644
--- a/include/lbann/layers/math/math_builders.hpp
+++ b/include/lbann/layers/math/math_builders.hpp
@@ -29,55 +29,6 @@
 namespace lbann
 {
 
-LBANN_DEFINE_LAYER_BUILDER(abs);
-LBANN_DEFINE_LAYER_BUILDER(acos);
-LBANN_DEFINE_LAYER_BUILDER(acosh);
-LBANN_DEFINE_LAYER_BUILDER(add);
-LBANN_DEFINE_LAYER_BUILDER(asin);
-LBANN_DEFINE_LAYER_BUILDER(asinh);
-LBANN_DEFINE_LAYER_BUILDER(atan);
-LBANN_DEFINE_LAYER_BUILDER(atanh);
-LBANN_DEFINE_LAYER_BUILDER(ceil);
-LBANN_DEFINE_LAYER_BUILDER(cos);
-LBANN_DEFINE_LAYER_BUILDER(cosh);
-LBANN_DEFINE_LAYER_BUILDER(divide);
-LBANN_DEFINE_LAYER_BUILDER(equal);
-LBANN_DEFINE_LAYER_BUILDER(exp);
-LBANN_DEFINE_LAYER_BUILDER(expm1);
-LBANN_DEFINE_LAYER_BUILDER(floor);
-LBANN_DEFINE_LAYER_BUILDER(greater);
-LBANN_DEFINE_LAYER_BUILDER(greater_equal);
-LBANN_DEFINE_LAYER_BUILDER(erf);
-LBANN_DEFINE_LAYER_BUILDER(erfinv);
-LBANN_DEFINE_LAYER_BUILDER(less);
-LBANN_DEFINE_LAYER_BUILDER(less_equal);
-LBANN_DEFINE_LAYER_BUILDER(log);
-LBANN_DEFINE_LAYER_BUILDER(log1p);
-LBANN_DEFINE_LAYER_BUILDER(logical_and);
-LBANN_DEFINE_LAYER_BUILDER(logical_not);
-LBANN_DEFINE_LAYER_BUILDER(logical_or);
-LBANN_DEFINE_LAYER_BUILDER(logical_xor);
 LBANN_DEFINE_LAYER_BUILDER(matmul);
-LBANN_DEFINE_LAYER_BUILDER(max);
-LBANN_DEFINE_LAYER_BUILDER(min);
-LBANN_DEFINE_LAYER_BUILDER(mod);
-LBANN_DEFINE_LAYER_BUILDER(multiply);
-LBANN_DEFINE_LAYER_BUILDER(negative);
-LBANN_DEFINE_LAYER_BUILDER(not_equal);
-LBANN_DEFINE_LAYER_BUILDER(pow);
-LBANN_DEFINE_LAYER_BUILDER(reciprocal);
-LBANN_DEFINE_LAYER_BUILDER(round);
-LBANN_DEFINE_LAYER_BUILDER(rsqrt);
-LBANN_DEFINE_LAYER_BUILDER(safe_divide);
-LBANN_DEFINE_LAYER_BUILDER(safe_reciprocal);
-LBANN_DEFINE_LAYER_BUILDER(sign);
-LBANN_DEFINE_LAYER_BUILDER(sin);
-LBANN_DEFINE_LAYER_BUILDER(sinh);
-LBANN_DEFINE_LAYER_BUILDER(sqrt);
-LBANN_DEFINE_LAYER_BUILDER(square);
-LBANN_DEFINE_LAYER_BUILDER(squared_difference);
-LBANN_DEFINE_LAYER_BUILDER(subtract);
-LBANN_DEFINE_LAYER_BUILDER(tan);
-LBANN_DEFINE_LAYER_BUILDER(tanh);
 
 }// namespace lbann
diff --git a/include/lbann/layers/math/unary.hpp b/include/lbann/layers/math/unary.hpp
deleted file mode 100644
index 6a7fc4f1382..00000000000
--- a/include/lbann/layers/math/unary.hpp
+++ /dev/null
@@ -1,142 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef LBANN_LAYERS_MATH_UNARY_HPP_INCLUDED
-#define LBANN_LAYERS_MATH_UNARY_HPP_INCLUDED
-
-#include "lbann/layers/data_type_layer.hpp"
-
-namespace lbann {
-
-#define LBANN_DECLARE_ENTRYWISE_UNARY_LAYER(LAYER_NAME, LAYER_STRING)       \
-  template <typename TensorDataType, data_layout Layout, El::Device Device> \
-  class LAYER_NAME : public data_type_layer<TensorDataType> {               \
-  public:                                                                   \
-  LAYER_NAME(lbann_comm *comm) : data_type_layer<TensorDataType>(comm) {}   \
-  LAYER_NAME() : LAYER_NAME(nullptr) {}                                     \
-    LAYER_NAME* copy() const override {                                     \
-      return new LAYER_NAME<TensorDataType,Layout,Device>(*this);           \
-    }                                                                       \
-    std::string get_type() const override { return LAYER_STRING; }          \
-    data_layout get_data_layout() const override { return Layout; }         \
-    El::Device get_device_allocation() const override { return Device; }    \
-    template <typename ArchiveT>                                            \
-    void serialize(ArchiveT& ar);                                           \
-  protected:                                                                \
-    void setup_dims(DataReaderMetaData& dr_metadata) override {             \
-      data_type_layer<TensorDataType>::setup_dims(dr_metadata);             \
-      this->set_output_dims(this->get_input_dims());                        \
-    }                                                                       \
-    void fp_compute() override;                                             \
-    void bp_compute() override;                                             \
-  }
-
-// Convenience macros for ETI decls for unary layers
-
-#ifndef LBANN_UNARY_LAYER_INSTANTIATE
-#define UNARY_ETI_DECL_MACRO_DEV(LAYER_NAME, T, DEVICE)                   \
-  extern template class LAYER_NAME<T, data_layout::DATA_PARALLEL, DEVICE>; \
-  extern template class LAYER_NAME<T, data_layout::MODEL_PARALLEL, DEVICE>
-#else
-#define UNARY_ETI_DECL_MACRO_DEV(...)
-#endif // LBANN_UNARY_LAYER_INSTANTIATE
-
-#define UNARY_ETI_INST_MACRO_DEV_DT(LAYER_NAME, T, DEVICE)          \
-  template class LAYER_NAME<T, data_layout::DATA_PARALLEL, DEVICE>; \
-  template class LAYER_NAME<T, data_layout::MODEL_PARALLEL, DEVICE>
-
-#define UNARY_ETI_INST_MACRO_DEV(LAYER_NAME, DEVICE)      \
-  UNARY_ETI_INST_MACRO_DEV_DT(LAYER_NAME, float, DEVICE); \
-  UNARY_ETI_INST_MACRO_DEV_DT(LAYER_NAME, double, DEVICE)
-
-#ifdef LBANN_HAS_GPU
-#define UNARY_ETI_DECL_MACRO(LAYER_NAME, T)                      \
-  UNARY_ETI_DECL_MACRO_DEV(LAYER_NAME, T, El::Device::CPU);       \
-  UNARY_ETI_DECL_MACRO_DEV(LAYER_NAME, T, El::Device::GPU)
-#else
-#define UNARY_ETI_DECL_MACRO(LAYER_NAME, T)               \
-  UNARY_ETI_DECL_MACRO_DEV(LAYER_NAME, T, El::Device::CPU)
-#endif // LBANN_HAS_GPU
-
-// Convenience macro to define an entry-wise unary layer class
-#define DEFINE_ENTRYWISE_UNARY_LAYER(layer_name, layer_string)    \
-  LBANN_DECLARE_ENTRYWISE_UNARY_LAYER(layer_name, layer_string);  \
-  UNARY_ETI_DECL_MACRO(layer_name, float);                        \
-  UNARY_ETI_DECL_MACRO(layer_name, double)
-
-// Logical operations
-DEFINE_ENTRYWISE_UNARY_LAYER(logical_not_layer, "logical not");
-
-// Sign operations
-DEFINE_ENTRYWISE_UNARY_LAYER(abs_layer,      "absolute value");
-DEFINE_ENTRYWISE_UNARY_LAYER(negative_layer, "negative");
-DEFINE_ENTRYWISE_UNARY_LAYER(sign_layer,     "sign");
-
-// Rounding operations
-DEFINE_ENTRYWISE_UNARY_LAYER(round_layer, "round");
-DEFINE_ENTRYWISE_UNARY_LAYER(ceil_layer,  "ceil");
-DEFINE_ENTRYWISE_UNARY_LAYER(floor_layer, "floor");
-
-// Power operations
-DEFINE_ENTRYWISE_UNARY_LAYER(reciprocal_layer,      "reciprocal");
-DEFINE_ENTRYWISE_UNARY_LAYER(square_layer,          "square");
-DEFINE_ENTRYWISE_UNARY_LAYER(sqrt_layer,            "square root");
-DEFINE_ENTRYWISE_UNARY_LAYER(rsqrt_layer,           "reciprocal square root");
-DEFINE_ENTRYWISE_UNARY_LAYER(safe_reciprocal_layer, "safe reciprocal");
-
-// Exponential and logarithmic operations
-DEFINE_ENTRYWISE_UNARY_LAYER(exp_layer,   "exponential");
-DEFINE_ENTRYWISE_UNARY_LAYER(expm1_layer, "expm1");
-DEFINE_ENTRYWISE_UNARY_LAYER(log_layer,   "natural logarithm");
-DEFINE_ENTRYWISE_UNARY_LAYER(log1p_layer, "log1p");
-
-// Trigonometric operations
-DEFINE_ENTRYWISE_UNARY_LAYER(cos_layer,  "cosine");
-DEFINE_ENTRYWISE_UNARY_LAYER(sin_layer,  "sine");
-DEFINE_ENTRYWISE_UNARY_LAYER(tan_layer,  "tangent");
-DEFINE_ENTRYWISE_UNARY_LAYER(acos_layer, "arccosine");
-DEFINE_ENTRYWISE_UNARY_LAYER(asin_layer, "arcsine");
-DEFINE_ENTRYWISE_UNARY_LAYER(atan_layer, "arctangent");
-
-// Hyperbolic operations
-DEFINE_ENTRYWISE_UNARY_LAYER(cosh_layer,  "hyperbolic cosine");
-DEFINE_ENTRYWISE_UNARY_LAYER(sinh_layer,  "hyperbolic sine");
-DEFINE_ENTRYWISE_UNARY_LAYER(tanh_layer,  "hyperbolic tangent");
-DEFINE_ENTRYWISE_UNARY_LAYER(acosh_layer, "hyperbolic arccosine");
-DEFINE_ENTRYWISE_UNARY_LAYER(asinh_layer, "hyperbolic arcsine");
-DEFINE_ENTRYWISE_UNARY_LAYER(atanh_layer, "hyperbolic arctangent");
-
-// Error function
-DEFINE_ENTRYWISE_UNARY_LAYER(erf_layer, "error function");
-DEFINE_ENTRYWISE_UNARY_LAYER(erfinv_layer, "inverse error function");
-
-} // namespace lbann
-
-#undef DEFINE_ENTRYWISE_UNARY_LAYER
-#undef UNARY_ETI_DECL_MACRO
-#undef UNARY_ETI_DECL_MACRO_DEV
-
-#endif // LBANN_LAYERS_MATH_UNARY_HPP_INCLUDED
diff --git a/include/lbann/layers/operator_layer_impl.hpp b/include/lbann/layers/operator_layer_impl.hpp
index 49471e43406..23cf2f5a6c6 100644
--- a/include/lbann/layers/operator_layer_impl.hpp
+++ b/include/lbann/layers/operator_layer_impl.hpp
@@ -48,6 +48,7 @@ OperatorLayer<InputT, OutputT, Layout, D>::OperatorLayer(lbann_comm& comm,
   LBANN_ASSERT(op);
   m_ops.reserve(1);
   m_ops.emplace_back(std::move(op));
+  this->m_expected_num_parent_layers = -1; // No limit on parents
 }
 
 template <typename InputT, typename OutputT, data_layout Layout, El::Device D>
@@ -58,6 +59,7 @@ OperatorLayer<InputT, OutputT, Layout, D>::OperatorLayer(
 {
   LBANN_ASSERT(m_ops.size() == 1UL); // For starters.
   LBANN_ASSERT(m_ops[0]);
+  this->m_expected_num_parent_layers = -1; // No limit on parents
 }
 
 template <typename InputT, typename OutputT, data_layout Layout, El::Device D>
@@ -261,5 +263,4 @@ namespace lbann {
 
 } // namespace lbann
 #endif // LBANN_INSTANTIATE_OPERATOR_LAYER
-
 #endif // LBANN_LAYERS_OPERATOR_LAYER_IMPL_HPP_INCLUDED
diff --git a/include/lbann/lbann.hpp b/include/lbann/lbann.hpp
index fc1e06dd0fb..d67a143bbdd 100644
--- a/include/lbann/lbann.hpp
+++ b/include/lbann/lbann.hpp
@@ -41,7 +41,6 @@
 #include "lbann/models/directed_acyclic_graph.hpp"
 
 /// Activation layers
-#include "lbann/layers/activations/activations.hpp"
 #include "lbann/layers/activations/elu.hpp"
 #include "lbann/layers/activations/identity.hpp"
 #include "lbann/layers/activations/leaky_relu.hpp"
@@ -63,7 +62,6 @@
 /// Loss layers
 #include "lbann/layers/loss/categorical_accuracy.hpp"
 #include "lbann/layers/loss/cross_entropy.hpp"
-#include "lbann/layers/loss/entrywise.hpp"
 #include "lbann/layers/loss/l1_norm.hpp"
 #include "lbann/layers/loss/l2_norm2.hpp"
 #include "lbann/layers/loss/mean_absolute_error.hpp"
@@ -71,8 +69,7 @@
 #include "lbann/layers/loss/top_k_categorical_accuracy.hpp"
 
 /// Math layers
-#include "lbann/layers/math/unary.hpp"
-#include "lbann/layers/math/binary.hpp"
+#include "lbann/layers/math/matmul.hpp"
 
 /// Transform layers
 #include "lbann/layers/transform/reshape.hpp"
diff --git a/include/lbann/operators/CMakeLists.txt b/include/lbann/operators/CMakeLists.txt
index 09e88768f5d..2ad05f7cd98 100644
--- a/include/lbann/operators/CMakeLists.txt
+++ b/include/lbann/operators/CMakeLists.txt
@@ -4,6 +4,7 @@ set_full_path(THIS_DIR_HEADERS
   operator.hpp
   )
 
+add_subdirectory(activations)
 add_subdirectory(math)
 
 # Propagate the files up the tree
diff --git a/include/lbann/operators/activations/CMakeLists.txt b/include/lbann/operators/activations/CMakeLists.txt
new file mode 100644
index 00000000000..d0d8d6449d7
--- /dev/null
+++ b/include/lbann/operators/activations/CMakeLists.txt
@@ -0,0 +1,7 @@
+# Add the headers for this directory
+set_full_path(THIS_DIR_HEADERS
+  activations.hpp
+  )
+
+# Propagate the files up the tree
+set(HEADERS "${HEADERS}" "${THIS_DIR_HEADERS}" PARENT_SCOPE)
diff --git a/src/layers/math/cereal_registration/abs.cpp b/include/lbann/operators/activations/activation_builders.hpp
similarity index 66%
rename from src/layers/math/cereal_registration/abs.cpp
rename to include/lbann/operators/activations/activation_builders.hpp
index 8df83c1e677..b9ef26f44ee 100644
--- a/src/layers/math/cereal_registration/abs.cpp
+++ b/include/lbann/operators/activations/activation_builders.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -23,22 +23,15 @@
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
+
+#include "lbann/operators/builder_macros.hpp"
 
 namespace lbann {
 
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-abs_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
+LBANN_DECLARE_OPERATOR_BUILDER(log_sigmoid);
+LBANN_DECLARE_OPERATOR_BUILDER(selu);
+LBANN_DECLARE_OPERATOR_BUILDER(sigmoid);
+LBANN_DECLARE_OPERATOR_BUILDER(softplus);
+LBANN_DECLARE_OPERATOR_BUILDER(softsign);
 
 } // namespace lbann
-
-#define LBANN_LAYER_NAME abs_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/include/lbann/operators/activations/activation_builders_impl.hpp b/include/lbann/operators/activations/activation_builders_impl.hpp
new file mode 100644
index 00000000000..c3315764f1d
--- /dev/null
+++ b/include/lbann/operators/activations/activation_builders_impl.hpp
@@ -0,0 +1,36 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/activations/activation_builders.hpp"
+#include "lbann/operators/activations/activations.hpp"
+
+#include "lbann/operators/builder_macros.hpp"
+
+LBANN_DEFINE_OPERATOR_BUILDER(log_sigmoid, LogSigmoid)
+LBANN_DEFINE_OPERATOR_BUILDER(selu, Selu)
+LBANN_DEFINE_OPERATOR_BUILDER(sigmoid, Sigmoid)
+LBANN_DEFINE_OPERATOR_BUILDER(softplus, Softplus)
+LBANN_DEFINE_OPERATOR_BUILDER(softsign, Softsign)
diff --git a/include/lbann/layers/activations/activations.hpp b/include/lbann/operators/activations/activations.hpp
similarity index 63%
rename from include/lbann/layers/activations/activations.hpp
rename to include/lbann/operators/activations/activations.hpp
index 24d11fade27..d2a046df275 100644
--- a/include/lbann/layers/activations/activations.hpp
+++ b/include/lbann/operators/activations/activations.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -24,45 +24,20 @@
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
 
-#ifndef LBANN_LAYERS_ACTIVATIONS_ACTIVATIONS_HPP_INCLUDED
-#define LBANN_LAYERS_ACTIVATIONS_ACTIVATIONS_HPP_INCLUDED
+#ifndef LBANN_INCLUDE_LBANN_OPERATORS_ACTIVATIONS_ACTIVATIONS_HPP_INCLUDED
+#define LBANN_INCLUDE_LBANN_OPERATORS_ACTIVATIONS_ACTIVATIONS_HPP_INCLUDED
 
-#include "lbann/layers/math/unary.hpp"
+#include <lbann/operators/declare_stateless_op.hpp>
 
 namespace lbann {
 
-// Convenience macros for ETI decls for unary layers
-
-#ifndef LBANN_ACTIVATIONS_LAYER_INSTANTIATE
-#define UNARY_ETI_DECL_MACRO_DEV(LAYER_NAME, T, DEVICE)                   \
-  extern template class LAYER_NAME<T, data_layout::DATA_PARALLEL, DEVICE>; \
-  extern template class LAYER_NAME<T, data_layout::MODEL_PARALLEL, DEVICE>
-#else
-#define UNARY_ETI_DECL_MACRO_DEV(...)
-#endif // LBANN_UNARY_LAYER_INSTANTIATE
-
-#ifdef LBANN_HAS_GPU
-#define UNARY_ETI_DECL_MACRO(LAYER_NAME, T)                  \
-  UNARY_ETI_DECL_MACRO_DEV(LAYER_NAME, T, El::Device::CPU);  \
-  UNARY_ETI_DECL_MACRO_DEV(LAYER_NAME, T, El::Device::GPU)
-#else
-#define UNARY_ETI_DECL_MACRO(LAYER_NAME, T)                 \
-  UNARY_ETI_DECL_MACRO_DEV(LAYER_NAME, T, El::Device::CPU)
-#endif // LBANN_HAS_GPU
-
-// Convenience macro to define an entry-wise unary layer class
-#define DEFINE_ENTRYWISE_UNARY_LAYER(layer_name, layer_string)    \
-  LBANN_DECLARE_ENTRYWISE_UNARY_LAYER(layer_name, layer_string);  \
-  UNARY_ETI_DECL_MACRO(layer_name, float);                        \
-  UNARY_ETI_DECL_MACRO(layer_name, double)
-
 /** @class lbann::log_sigmoid_layer
  *  @brief Logarithm of sigmoid function.
  *
  *  @f[ \log(\sigma(x)) = -\log(1 + e^{-x}) @f]
  *  See https://en.wikipedia.org/wiki/Sigmoid_function.
  */
-DEFINE_ENTRYWISE_UNARY_LAYER(log_sigmoid_layer, "log sigmoid");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(LogSigmoid, "log sigmoid");
 
 /** @class lbann::selu_layer
  *  @brief Scaled exponential rectified linear unit.
@@ -81,7 +56,7 @@ DEFINE_ENTRYWISE_UNARY_LAYER(log_sigmoid_layer, "log sigmoid");
  *  Hochreiter. "Self-normalizing neural networks." In Advances in
  *  Neural Information Processing Systems, pp. 971-980. 2017.
  */
-DEFINE_ENTRYWISE_UNARY_LAYER(selu_layer, "SELU");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Selu, "SELU");
 
 /** @class lbann::sigmoid_layer
  *  @brief Special case of logistic function.
@@ -89,7 +64,7 @@ DEFINE_ENTRYWISE_UNARY_LAYER(selu_layer, "SELU");
  *  @f[ \sigma(x) = \frac{1}{1 + e^{-x}} @f]
  *  See https://en.wikipedia.org/wiki/Sigmoid_function.
  */
-DEFINE_ENTRYWISE_UNARY_LAYER(sigmoid_layer, "sigmoid");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Sigmoid, "sigmoid");
 // Sigmoid function output is strictly in (0,1)
 // Note: Output is in the range [eps,1-eps], where 'eps' is machine
 // epsilon. This avoids denormalized floats and helps mitigate some
@@ -102,14 +77,14 @@ DEFINE_ENTRYWISE_UNARY_LAYER(sigmoid_layer, "sigmoid");
  *  @f[ \text{softplus}(x) = \log (e^x + 1) @f]
  *  See https://en.wikipedia.org/wiki/Rectifier_(neural_networks)
  */
-DEFINE_ENTRYWISE_UNARY_LAYER(softplus_layer, "softplus");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Softplus, "softplus");
 
 /** @class lbann::softsign_layer
  *  @brief Smooth approximation to sign function.
  *
  *  @f[ \text{softsign}(x) = \frac{x}{1 + |x|} @f]
  */
-DEFINE_ENTRYWISE_UNARY_LAYER(softsign_layer, "softsign");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Softsign, "softsign");
 
 } // namespace lbann
 
@@ -117,4 +92,4 @@ DEFINE_ENTRYWISE_UNARY_LAYER(softsign_layer, "softsign");
 #undef UNARY_ETI_DECL_MACRO
 #undef UNARY_ETI_DECL_MACRO_DEV
 
-#endif // LBANN_LAYERS_ACTIVATIONS_ACTIVATIONS_HPP_INCLUDED
+#endif // LBANN_INCLUDE_LBANN_OPERATORS_ACTIVATIONS_ACTIVATIONS_HPP_INCLUDED
diff --git a/include/lbann/operators/builder_macros.hpp b/include/lbann/operators/builder_macros.hpp
new file mode 100644
index 00000000000..cf2a1520f3a
--- /dev/null
+++ b/include/lbann/operators/builder_macros.hpp
@@ -0,0 +1,88 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+#ifndef LBANN_INCLUDE_LBANN_OPERATORS_BUILDER_MACROS_HPP_INCLUDED
+#define LBANN_INCLUDE_LBANN_OPERATORS_BUILDER_MACROS_HPP_INCLUDED
+
+#include "lbann/operators/operator.hpp"
+#include "lbann/proto/datatype_helpers.hpp"
+
+#include <operators.pb.h>
+
+#include <memory>
+
+// Forward declaration
+namespace lbann_data {
+class Operator;
+} // namespace lbann_data
+
+namespace lbann {
+namespace details {
+
+template <typename InputT, typename OutputT, El::Device D>
+void AssertConsistentTypeParameters(lbann_data::Operator const& op)
+{
+  LBANN_ASSERT(proto::ProtoDataType<InputT> == op.input_datatype());
+  LBANN_ASSERT(proto::ProtoDataType<OutputT> == op.output_datatype());
+  LBANN_ASSERT(proto::ProtoDevice<D> ==
+               proto::resolve_default_device(op.device_allocation()));
+}
+
+} // namespace details
+} // namespace lbann
+
+/** @brief A utility macro fro adding a builder declaration for a single-type
+ *         operator.
+ *  @note Must be called inside lbann namespace.
+ */
+#define LBANN_DECLARE_OPERATOR_BUILDER(OP_NAME)                                \
+  template <typename DataT, El::Device D>                                      \
+  std::unique_ptr<Operator<DataT, DataT, D>> build_##OP_NAME##_operator(       \
+    lbann_data::Operator const& op)
+
+/** @brief A utility macro for easily adding a default builder with
+ *         dynamic type-checking assertions.
+ *
+ *  Type-checking is only done with Debug builds.
+ *
+ *  @note Must *NOT* be called inside lbann namespace.
+ */
+#define LBANN_DEFINE_OPERATOR_BUILDER(OP_LOWER, OP_NAME)                       \
+  template <typename DataT, El::Device D>                                      \
+  std::unique_ptr<lbann::Operator<DataT, DataT, D>>                            \
+    lbann::build_##OP_LOWER##_operator(lbann_data::Operator const& op)         \
+  {                                                                            \
+    details::AssertConsistentTypeParameters<DataT, DataT, D>(op);              \
+    return std::make_unique<OP_NAME##Operator<DataT, D>>();                    \
+  }
+
+/** @brief A utility macro for easily adding ETI for operator builders
+ *  @note Must *NOT* be called inside lbann namespace.
+ */
+#define LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(OPERATOR_NAME, T, D)            \
+  template std::unique_ptr<lbann::Operator<T, T, D>>                           \
+    lbann::build_##OPERATOR_NAME##_operator<T, D>(lbann_data::Operator const&)
+
+#endif // LBANN_INCLUDE_LBANN_OPERATORS_BUILDER_MACROS_HPP_INCLUDED
diff --git a/include/lbann/operators/declare_stateless_op.hpp b/include/lbann/operators/declare_stateless_op.hpp
new file mode 100644
index 00000000000..3d9ab782dc4
--- /dev/null
+++ b/include/lbann/operators/declare_stateless_op.hpp
@@ -0,0 +1,125 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+#ifndef LBANN_INCLUDE_LBANN_OPERATORS_DECLARE_STATELESS_OP_HPP_INCLUDED
+#define LBANN_INCLUDE_LBANN_OPERATORS_DECLARE_STATELESS_OP_HPP_INCLUDED
+
+#include "lbann/operators/elementwise_operator.hpp"
+#include "lbann/operators/operator.hpp"
+#include "lbann/utils/cloneable.hpp"
+
+#include <operators.pb.h>
+
+// These are all single-type operators.
+
+#define LBANN_DECLARE_STATELESS_OPERATOR(OP_NAME, OP_STRING)                   \
+  template <typename DataT, El::Device D>                                      \
+  class OP_NAME##Operator final                                                \
+    : public Cloneable<OP_NAME##Operator<DataT, D>, Operator<DataT, DataT, D>> \
+  {                                                                            \
+    using BaseType =                                                           \
+      Cloneable<OP_NAME##Operator<DataT, D>, Operator<DataT, DataT, D>>;       \
+    using InputTensorType = typename BaseType::InputTensorType;                \
+    using OutputTensorType = typename BaseType::OutputTensorType;              \
+    using ConstInputTensorType = typename BaseType::ConstInputTensorType;      \
+    using ConstOutputTensorType = typename BaseType::ConstOutputTensorType;    \
+                                                                               \
+  public:                                                                      \
+    OP_NAME##Operator() = default;                                             \
+    OP_NAME##Operator(OP_NAME##Operator&&) = default;                          \
+    OP_NAME##Operator(OP_NAME##Operator const&) = default;                     \
+    OP_NAME##Operator& operator=(OP_NAME##Operator&&) = default;               \
+    OP_NAME##Operator& operator=(OP_NAME##Operator const&) = default;          \
+    ~OP_NAME##Operator() = default;                                            \
+    std::string get_type() const final { return OP_STRING; }                   \
+    template <typename ArchiveT>                                               \
+    void serialize(ArchiveT& ar)                                               \
+    {                                                                          \
+      using OperatorType = Operator<DataT, DataT, D>;                          \
+      ar(::cereal::make_nvp("Operator",                                        \
+                            ::cereal::base_class<OperatorType>(this)));        \
+    }                                                                          \
+    void fp_compute(std::vector<ConstInputTensorType> const& inputs,           \
+                    std::vector<OutputTensorType> const& outputs) const final; \
+    void bp_compute(                                                           \
+      std::vector<ConstInputTensorType> const& inputs,                         \
+      std::vector<ConstOutputTensorType> const& gradient_wrt_outputs,          \
+      std::vector<InputTensorType> const& gradient_wrt_inputs) const final;    \
+                                                                               \
+  private:                                                                     \
+    void set_proto_params(lbann_data::Operator& msg) const final               \
+    {                                                                          \
+      msg.mutable_parameters()->PackFrom(lbann_data::OP_NAME##Operator{});     \
+    }                                                                          \
+    void do_fill_description(description&) const final {}                      \
+  }
+
+#define LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(OP_NAME, OP_STRING)       \
+  template <typename DataT, El::Device D>                                      \
+  class OP_NAME##Operator final                                                \
+    : public Cloneable<OP_NAME##Operator<DataT, D>,                            \
+                       ElementwiseOperator<DataT, DataT, D>>                   \
+  {                                                                            \
+    using BaseType = Cloneable<OP_NAME##Operator<DataT, D>,                    \
+                               ElementwiseOperator<DataT, DataT, D>>;          \
+    using LocalInputTensorType = typename BaseType::LocalInputTensorType;      \
+    using LocalOutputTensorType = typename BaseType::LocalOutputTensorType;    \
+    using ConstLocalInputTensorType =                                          \
+      typename BaseType::ConstLocalInputTensorType;                            \
+    using ConstLocalOutputTensorType =                                         \
+      typename BaseType::ConstLocalOutputTensorType;                           \
+                                                                               \
+  public:                                                                      \
+    OP_NAME##Operator() = default;                                             \
+    OP_NAME##Operator(OP_NAME##Operator&&) = default;                          \
+    OP_NAME##Operator(OP_NAME##Operator const&) = default;                     \
+    OP_NAME##Operator& operator=(OP_NAME##Operator&&) = default;               \
+    OP_NAME##Operator& operator=(OP_NAME##Operator const&) = default;          \
+    ~OP_NAME##Operator() = default;                                            \
+    std::string get_type() const final { return OP_STRING; }                   \
+    template <typename ArchiveT>                                               \
+    void serialize(ArchiveT& ar)                                               \
+    {                                                                          \
+      using OperatorType = ElementwiseOperator<DataT, DataT, D>;               \
+      ar(::cereal::make_nvp("ElementwiseOperator",                             \
+                            ::cereal::base_class<OperatorType>(this)));        \
+    }                                                                          \
+                                                                               \
+  private:                                                                     \
+    void                                                                       \
+    fp_compute_local(std::vector<ConstLocalInputTensorType> inputs,            \
+                     std::vector<LocalOutputTensorType> outputs) const final;  \
+    void bp_compute_local(                                                     \
+      std::vector<ConstLocalInputTensorType> inputs,                           \
+      std::vector<ConstLocalOutputTensorType> grads_wrt_outputs,               \
+      std::vector<LocalInputTensorType> grads_wrt_inputs) const final;         \
+    void set_proto_params(lbann_data::Operator& msg) const final               \
+    {                                                                          \
+      msg.mutable_parameters()->PackFrom(lbann_data::OP_NAME##Operator{});     \
+    }                                                                          \
+    void do_fill_description(description&) const final {}                      \
+  }
+
+#endif // LBANN_INCLUDE_LBANN_OPERATORS_DECLARE_STATELESS_OP_HPP_INCLUDED
diff --git a/include/lbann/operators/elementwise_operator.hpp b/include/lbann/operators/elementwise_operator.hpp
index 8388debb6ec..d62836f70cc 100644
--- a/include/lbann/operators/elementwise_operator.hpp
+++ b/include/lbann/operators/elementwise_operator.hpp
@@ -120,10 +120,10 @@ class ElementwiseOperator
    *           tensors, the gradient w.r.t. input tensors are
    *           populated with the computed values.
    */
-  void
-  bp_compute(std::vector<ConstInputTensorType> const& inputs,
-             std::vector<ConstOutputTensorType> const& gradient_wrt_outputs,
-             std::vector<InputTensorType> const& gradient_wrt_inputs) const final
+  void bp_compute(
+    std::vector<ConstInputTensorType> const& inputs,
+    std::vector<ConstOutputTensorType> const& gradient_wrt_outputs,
+    std::vector<InputTensorType> const& gradient_wrt_inputs) const final
   {
     return bp_compute_local(get_local_tensor_views(inputs),
                             get_local_tensor_views(gradient_wrt_outputs),
diff --git a/include/lbann/operators/loss/CMakeLists.txt b/include/lbann/operators/loss/CMakeLists.txt
new file mode 100644
index 00000000000..a86e5df5f33
--- /dev/null
+++ b/include/lbann/operators/loss/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Add the headers for this directory
+set_full_path(THIS_DIR_HEADERS
+  entrywise.hpp
+  loss_builders.hpp
+  loss_builders_impl.hpp
+  )
+
+# Propagate the files up the tree
+set(HEADERS "${HEADERS}" "${THIS_DIR_HEADERS}" PARENT_SCOPE)
diff --git a/src/layers/math/cereal_registration/acosh.cpp b/include/lbann/operators/loss/entrywise.hpp
similarity index 52%
rename from src/layers/math/cereal_registration/acosh.cpp
rename to include/lbann/operators/loss/entrywise.hpp
index f34f24df7bd..1714c12c429 100644
--- a/src/layers/math/cereal_registration/acosh.cpp
+++ b/include/lbann/operators/loss/entrywise.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -23,22 +23,28 @@
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
+
+#ifndef LBANN_INCLUDE_LBANN_OPERATORS_LOSS_ENTRYWISE_HPP_INCLUDED
+#define LBANN_INCLUDE_LBANN_OPERATORS_LOSS_ENTRYWISE_HPP_INCLUDED
+
+#include "lbann/operators/declare_stateless_op.hpp"
 
 namespace lbann {
 
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-acosh_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
+// Cross entropy loss
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(BinaryCrossEntropy,
+                                             "binary cross entropy");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(SigmoidBinaryCrossEntropy,
+                                             "sigmoid binary cross entropy");
+
+// Boolean loss functions
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(BooleanAccuracy,
+                                             "Boolean accuracy");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(BooleanFalseNegative,
+                                             "Boolean false negative rate");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(BooleanFalsePositive,
+                                             "Boolean false positive rate");
 
 } // namespace lbann
 
-#define LBANN_LAYER_NAME acosh_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
+#endif // LBANN_INCLUDE_LBANN_OPERATORS_LOSS_ENTRYWISE_HPP_INCLUDED
diff --git a/src/layers/activations/cereal_registration/log_sigmoid.cpp b/include/lbann/operators/loss/loss_builders.hpp
similarity index 64%
rename from src/layers/activations/cereal_registration/log_sigmoid.cpp
rename to include/lbann/operators/loss/loss_builders.hpp
index 684df579d83..fcd37305a00 100644
--- a/src/layers/activations/cereal_registration/log_sigmoid.cpp
+++ b/include/lbann/operators/loss/loss_builders.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -23,22 +23,20 @@
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/activations/activations.hpp>
+
+#ifndef LBANN_INCLUDE_LBANN_OPERATORS_LOSS_LOSS_BUILDERS_HPP_INCLUDED
+#define LBANN_INCLUDE_LBANN_OPERATORS_LOSS_LOSS_BUILDERS_HPP_INCLUDED
+
+#include "lbann/operators/builder_macros.hpp"
+#include "lbann/operators/operator.hpp"
 
 namespace lbann {
 
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-log_sigmoid_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
+LBANN_DECLARE_OPERATOR_BUILDER(binary_cross_entropy);
+LBANN_DECLARE_OPERATOR_BUILDER(boolean_accuracy);
+LBANN_DECLARE_OPERATOR_BUILDER(boolean_false_negative);
+LBANN_DECLARE_OPERATOR_BUILDER(boolean_false_positive);
+LBANN_DECLARE_OPERATOR_BUILDER(sigmoid_binary_cross_entropy);
 
 } // namespace lbann
-
-#define LBANN_LAYER_NAME log_sigmoid_layer
-#include "lbann/macros/register_layer_with_cereal.hpp"
+#endif // LBANN_INCLUDE_LBANN_OPERATORS_LOSS_LOSS_BUILDERS_HPP_INCLUDED
diff --git a/src/layers/math/cereal_registration/acos.cpp b/include/lbann/operators/loss/loss_builders_impl.hpp
similarity index 58%
rename from src/layers/math/cereal_registration/acos.cpp
rename to include/lbann/operators/loss/loss_builders_impl.hpp
index 77f8156cc94..81061d7fc2e 100644
--- a/src/layers/math/cereal_registration/acos.cpp
+++ b/include/lbann/operators/loss/loss_builders_impl.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -23,22 +23,17 @@
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
+#ifndef LBANN_INCLUDE_LBANN_OPERATORS_LOSS_LOSS_BUILDERS_IMPL_HPP_INCLUDED
+#define LBANN_INCLUDE_LBANN_OPERATORS_LOSS_LOSS_BUILDERS_IMPL_HPP_INCLUDED
 
-namespace lbann {
+#include "lbann/operators/loss/entrywise.hpp"
+#include "lbann/operators/loss/loss_builders.hpp"
 
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-acos_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
+LBANN_DEFINE_OPERATOR_BUILDER(binary_cross_entropy, BinaryCrossEntropy);
+LBANN_DEFINE_OPERATOR_BUILDER(boolean_accuracy, BooleanAccuracy);
+LBANN_DEFINE_OPERATOR_BUILDER(boolean_false_negative, BooleanFalseNegative);
+LBANN_DEFINE_OPERATOR_BUILDER(boolean_false_positive, BooleanFalsePositive);
+LBANN_DEFINE_OPERATOR_BUILDER(sigmoid_binary_cross_entropy,
+                              SigmoidBinaryCrossEntropy);
 
-} // namespace lbann
-
-#define LBANN_LAYER_NAME acos_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
+#endif // LBANN_INCLUDE_LBANN_OPERATORS_LOSS_LOSS_BUILDERS_IMPL_HPP_INCLUDED
diff --git a/include/lbann/operators/math/CMakeLists.txt b/include/lbann/operators/math/CMakeLists.txt
index 990c1340ab2..5fc1e722c71 100644
--- a/include/lbann/operators/math/CMakeLists.txt
+++ b/include/lbann/operators/math/CMakeLists.txt
@@ -1,8 +1,11 @@
 # Add the headers for this directory
 set_full_path(THIS_DIR_HEADERS
+  abs.hpp
+  binary.hpp
   clamp.hpp
   math_builders.hpp
   math_builders_impl.hpp
+  unary.hpp
   )
 
 # Propagate the files up the tree
diff --git a/include/lbann/operators/math/abs.hpp b/include/lbann/operators/math/abs.hpp
new file mode 100644
index 00000000000..34f94fae7ce
--- /dev/null
+++ b/include/lbann/operators/math/abs.hpp
@@ -0,0 +1,129 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef LBANN_OPERATORS_MATH_ABS_HPP_INCLUDED
+#define LBANN_OPERATORS_MATH_ABS_HPP_INCLUDED
+
+#include "lbann_config.hpp"
+
+#include "lbann/operators/elementwise_operator.hpp"
+#include "lbann/proto/datatype_helpers.hpp"
+#include "lbann/utils/cloneable.hpp"
+
+#include <operators.pb.h>
+
+#include <h2/meta/Core.hpp>
+
+#include <google/protobuf/message.h>
+
+namespace lbann {
+
+/** @brief Entrywise absolute value.
+ *
+ *  @f[
+ *    \text{abs}(x) = |x|
+ *  @f]
+ */
+template <typename DataT, El::Device D>
+class AbsOperator final
+  : public Cloneable<AbsOperator<DataT, D>,
+                     ElementwiseOperator<DataT, El::Base<DataT>, D>>
+{
+  /** @name Private Types */
+  ///@{
+
+  using BaseType = Cloneable<AbsOperator<DataT, D>,
+                             ElementwiseOperator<DataT, El::Base<DataT>, D>>;
+
+  using LocalInputTensorType = typename BaseType::LocalInputTensorType;
+  using LocalOutputTensorType = typename BaseType::LocalOutputTensorType;
+  using ConstLocalInputTensorType =
+    typename BaseType::ConstLocalInputTensorType;
+  using ConstLocalOutputTensorType =
+    typename BaseType::ConstLocalOutputTensorType;
+
+  ///@}
+
+public:
+  /** @name Lifecycle */
+  ///@{
+
+  AbsOperator() = default;
+  AbsOperator(AbsOperator&&) = default;
+  AbsOperator(AbsOperator const&) = default;
+  AbsOperator& operator=(AbsOperator&&) = default;
+  AbsOperator& operator=(AbsOperator const&) = default;
+  ~AbsOperator() = default;
+
+  ///@}
+  /** @name Queries */
+  ///@{
+
+  std::string get_type() const final { return "abs"; }
+
+  ///@}
+  /** @name Serialization */
+  ///@{
+
+  template <typename ArchiveT>
+  void serialize(ArchiveT& ar)
+  {
+    using OperatorType = ElementwiseOperator<DataT, El::Base<DataT>, D>;
+    ar(::cereal::make_nvp("DataTypeOperator",
+                          ::cereal::base_class<OperatorType>(this)));
+  }
+
+  ///@}
+
+private:
+  /** @brief Local forward compute function */
+  virtual void
+  fp_compute_local(std::vector<ConstLocalInputTensorType> input,
+                   std::vector<LocalOutputTensorType> output) const final;
+
+  /** @brief Local backward compute function */
+  void bp_compute_local(
+    std::vector<ConstLocalInputTensorType> input,
+    std::vector<ConstLocalOutputTensorType> gradient_wrt_output,
+    std::vector<LocalInputTensorType> gradient_wrt_input) const final;
+
+  void set_proto_params(lbann_data::Operator& msg) const final
+  {
+    msg.mutable_parameters()->PackFrom(lbann_data::AbsOperator{});
+  }
+
+  void do_fill_description(description& desc) const final {}
+}; // class AbsOperator
+
+#ifndef LBANN_ABS_OPERATOR_INSTANTIATE
+#define PROTO_DEVICE(T, D) extern template class AbsOperator<T, D>
+#include "lbann/macros/instantiate_device.hpp"
+#undef PROTO_DEVICE
+#endif // LBANN_ABS_OPERATOR_INSTANTIATE
+
+} // namespace lbann
+
+#endif // LBANN_OPERATORS_MATH_ABS_HPP_INCLUDED
diff --git a/include/lbann/operators/math/binary.hpp b/include/lbann/operators/math/binary.hpp
new file mode 100644
index 00000000000..be193c95c1f
--- /dev/null
+++ b/include/lbann/operators/math/binary.hpp
@@ -0,0 +1,63 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef LBANN_INCLUDE_LBANN_OPERATORS_MATH_BINARY_HPP_INCLUDED
+#define LBANN_INCLUDE_LBANN_OPERATORS_MATH_BINARY_HPP_INCLUDED
+
+#include "lbann/operators/declare_stateless_op.hpp"
+
+namespace lbann {
+
+// Arithmetic operations
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Add, "add");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Subtract, "subtract");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Multiply, "multiply");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Divide, "divide");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Mod, "modulo");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Pow, "power");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(SafeDivide, "safe divide");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(SquaredDifference,
+                                             "squared difference");
+
+// Comparison operations
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Max, "maximum");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Min, "minimum");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Equal, "equal");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(NotEqual, "not equal");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Less, "less than");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(LessEqual, "less than or equal");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Greater, "greater than");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(GreaterEqual,
+                                             "greater than or equal");
+
+// Logical operations
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(LogicalAnd, "logical and");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(LogicalOr, "logical or");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(LogicalXor, "logical xor");
+
+} // namespace lbann
+
+#endif // LBANN_INCLUDE_LBANN_OPERATORS_MATH_BINARY_HPP_INCLUDED
diff --git a/include/lbann/operators/math/clamp.hpp b/include/lbann/operators/math/clamp.hpp
index 413892d034b..1fa71f7b855 100644
--- a/include/lbann/operators/math/clamp.hpp
+++ b/include/lbann/operators/math/clamp.hpp
@@ -30,15 +30,12 @@
 #include "lbann_config.hpp"
 
 #include "lbann/operators/elementwise_operator.hpp"
-#include "lbann/proto/datatype_helpers.hpp"
 #include "lbann/utils/cloneable.hpp"
 
 #include <operators.pb.h>
 
 #include <h2/meta/Core.hpp>
 
-#include <google/protobuf/message.h>
-
 namespace lbann {
 
 /** @brief Constrain values to a range.
@@ -53,8 +50,9 @@ namespace lbann {
  *  @f]
  */
 template <typename DataT, El::Device D>
-class ClampOperator : public Cloneable<ClampOperator<DataT, D>,
-                                       ElementwiseOperator<DataT, DataT, D>>
+class ClampOperator final
+  : public Cloneable<ClampOperator<DataT, D>,
+                     ElementwiseOperator<DataT, DataT, D>>
 {
 #ifdef LBANN_HAS_GPU_FP16
   using CompareType =
@@ -111,7 +109,7 @@ class ClampOperator : public Cloneable<ClampOperator<DataT, D>,
   void serialize(ArchiveT& ar)
   {
     using OperatorType = ElementwiseOperator<DataT, DataT, D>;
-    ar(::cereal::make_nvp("DataTypeOperator",
+    ar(::cereal::make_nvp("ElementwiseOperator",
                           ::cereal::base_class<OperatorType>(this)),
        CEREAL_NVP(m_min),
        CEREAL_NVP(m_max));
@@ -125,17 +123,16 @@ class ClampOperator : public Cloneable<ClampOperator<DataT, D>,
 
 private:
   /** @brief Local forward compute function */
-  virtual void
-  fp_compute_local(std::vector<ConstLocalInputTensorType> input,
-                   std::vector<LocalOutputTensorType> output) const override;
+  void fp_compute_local(std::vector<ConstLocalInputTensorType> input,
+                        std::vector<LocalOutputTensorType> output) const final;
 
   /** @brief Local backward compute function */
   void bp_compute_local(
     std::vector<ConstLocalInputTensorType> input,
     std::vector<ConstLocalOutputTensorType> gradient_wrt_output,
-    std::vector<LocalInputTensorType> gradient_wrt_input) const override;
+    std::vector<LocalInputTensorType> gradient_wrt_input) const final;
 
-  void set_proto_params(lbann_data::Operator& msg) const override
+  void set_proto_params(lbann_data::Operator& msg) const final
   {
     lbann_data::ClampOperator clamp_msg;
     clamp_msg.set_min(m_min);
@@ -143,7 +140,7 @@ class ClampOperator : public Cloneable<ClampOperator<DataT, D>,
     msg.mutable_parameters()->PackFrom(clamp_msg);
   }
 
-  void do_fill_description(description& desc) const override
+  void do_fill_description(description& desc) const final
   {
     std::ostringstream oss;
     oss << "[" << m_min << "," << m_max << "]";
diff --git a/include/lbann/operators/math/math_builders.hpp b/include/lbann/operators/math/math_builders.hpp
index 873cda58943..7a232d5927f 100644
--- a/include/lbann/operators/math/math_builders.hpp
+++ b/include/lbann/operators/math/math_builders.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -23,19 +23,68 @@
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
+#ifndef LBANN_INCLUDE_LBANN_OPERATORS_MATH_MATH_BUILDERS_HPP_INCLUDED
+#define LBANN_INCLUDE_LBANN_OPERATORS_MATH_MATH_BUILDERS_HPP_INCLUDED
 
 #include "lbann/operators/operator.hpp"
-#include <memory>
-
-// Forward declaration
-namespace lbann_data {
-class Operator;
-} // namespace lbann_data
+#include "lbann/operators/builder_macros.hpp"
 
 namespace lbann {
 
 template <typename DataT, El::Device D>
-std::unique_ptr<Operator<DataT, DataT, D>>
-build_clamp_operator(lbann_data::Operator const& op);
+std::unique_ptr<Operator<DataT, El::Base<DataT>, D>>
+build_abs_operator(lbann_data::Operator const& op);
+
+LBANN_DECLARE_OPERATOR_BUILDER(acos);
+LBANN_DECLARE_OPERATOR_BUILDER(acosh);
+LBANN_DECLARE_OPERATOR_BUILDER(add);
+LBANN_DECLARE_OPERATOR_BUILDER(asin);
+LBANN_DECLARE_OPERATOR_BUILDER(asinh);
+LBANN_DECLARE_OPERATOR_BUILDER(atan);
+LBANN_DECLARE_OPERATOR_BUILDER(atanh);
+LBANN_DECLARE_OPERATOR_BUILDER(ceil);
+LBANN_DECLARE_OPERATOR_BUILDER(clamp);
+LBANN_DECLARE_OPERATOR_BUILDER(cos);
+LBANN_DECLARE_OPERATOR_BUILDER(cosh);
+LBANN_DECLARE_OPERATOR_BUILDER(divide);
+LBANN_DECLARE_OPERATOR_BUILDER(equal);
+LBANN_DECLARE_OPERATOR_BUILDER(erf);
+LBANN_DECLARE_OPERATOR_BUILDER(erfinv);
+LBANN_DECLARE_OPERATOR_BUILDER(exp);
+LBANN_DECLARE_OPERATOR_BUILDER(expm1);
+LBANN_DECLARE_OPERATOR_BUILDER(floor);
+LBANN_DECLARE_OPERATOR_BUILDER(greater);
+LBANN_DECLARE_OPERATOR_BUILDER(greater_equal);
+LBANN_DECLARE_OPERATOR_BUILDER(less);
+LBANN_DECLARE_OPERATOR_BUILDER(less_equal);
+LBANN_DECLARE_OPERATOR_BUILDER(log);
+LBANN_DECLARE_OPERATOR_BUILDER(log1p);
+LBANN_DECLARE_OPERATOR_BUILDER(logical_and);
+LBANN_DECLARE_OPERATOR_BUILDER(logical_not);
+LBANN_DECLARE_OPERATOR_BUILDER(logical_or);
+LBANN_DECLARE_OPERATOR_BUILDER(logical_xor);
+LBANN_DECLARE_OPERATOR_BUILDER(max);
+LBANN_DECLARE_OPERATOR_BUILDER(min);
+LBANN_DECLARE_OPERATOR_BUILDER(mod);
+LBANN_DECLARE_OPERATOR_BUILDER(multiply);
+LBANN_DECLARE_OPERATOR_BUILDER(negative);
+LBANN_DECLARE_OPERATOR_BUILDER(not_equal);
+LBANN_DECLARE_OPERATOR_BUILDER(pow);
+LBANN_DECLARE_OPERATOR_BUILDER(reciprocal);
+LBANN_DECLARE_OPERATOR_BUILDER(round);
+LBANN_DECLARE_OPERATOR_BUILDER(rsqrt);
+LBANN_DECLARE_OPERATOR_BUILDER(safe_divide);
+LBANN_DECLARE_OPERATOR_BUILDER(safe_reciprocal);
+LBANN_DECLARE_OPERATOR_BUILDER(sign);
+LBANN_DECLARE_OPERATOR_BUILDER(sin);
+LBANN_DECLARE_OPERATOR_BUILDER(sinh);
+LBANN_DECLARE_OPERATOR_BUILDER(sqrt);
+LBANN_DECLARE_OPERATOR_BUILDER(square);
+LBANN_DECLARE_OPERATOR_BUILDER(squared_difference);
+LBANN_DECLARE_OPERATOR_BUILDER(subtract);
+LBANN_DECLARE_OPERATOR_BUILDER(tan);
+LBANN_DECLARE_OPERATOR_BUILDER(tanh);
 
 } // namespace lbann
+
+#endif // LBANN_INCLUDE_LBANN_OPERATORS_MATH_MATH_BUILDERS_HPP_INCLUDED
diff --git a/include/lbann/operators/math/math_builders_impl.hpp b/include/lbann/operators/math/math_builders_impl.hpp
index eca30524bc0..0f69d3417f2 100644
--- a/include/lbann/operators/math/math_builders_impl.hpp
+++ b/include/lbann/operators/math/math_builders_impl.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -23,26 +23,82 @@
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
+#ifndef LBANN_INCLUDE_LBANN_OPERATORS_MATH_MATH_BUILDERS_IMPL_HPP_INCLUDED
+#define LBANN_INCLUDE_LBANN_OPERATORS_MATH_MATH_BUILDERS_IMPL_HPP_INCLUDED
 
-#include "lbann/operators/math/clamp.hpp"
 #include "lbann/operators/math/math_builders.hpp"
-#include "lbann/operators/operator.hpp"
-#include "lbann/proto/datatype_helpers.hpp"
 
-#include <memory>
+#include "lbann/operators/math/abs.hpp"
+#include "lbann/operators/math/binary.hpp"
+#include "lbann/operators/math/clamp.hpp"
+#include "lbann/operators/math/unary.hpp"
 
-#include <operators.pb.h>
+#include "lbann/proto/datatype_helpers.hpp"
 
 template <typename DataT, El::Device D>
 std::unique_ptr<lbann::Operator<DataT, DataT, D>>
 lbann::build_clamp_operator(lbann_data::Operator const& op)
 {
-  LBANN_ASSERT(proto::ProtoDataType<DataT> == op.input_datatype());
-  LBANN_ASSERT(proto::ProtoDataType<DataT> == op.output_datatype());
-  LBANN_ASSERT(proto::ProtoDevice<D> ==
-               proto::resolve_default_device(op.device_allocation()));
-
+  details::AssertConsistentTypeParameters<DataT, DataT, D>(op);
   lbann_data::ClampOperator params;
   LBANN_ASSERT(op.parameters().UnpackTo(&params));
   return std::make_unique<ClampOperator<DataT, D>>(params.min(), params.max());
 }
+
+template <typename DataT, El::Device D>
+std::unique_ptr<lbann::Operator<DataT, El::Base<DataT>, D>>
+lbann::build_abs_operator(lbann_data::Operator const& op)
+{
+  details::AssertConsistentTypeParameters<DataT, El::Base<DataT>, D>(op);
+  return std::make_unique<AbsOperator<DataT, D>>();
+}
+
+LBANN_DEFINE_OPERATOR_BUILDER(acos, Acos)
+LBANN_DEFINE_OPERATOR_BUILDER(acosh, Acosh)
+LBANN_DEFINE_OPERATOR_BUILDER(add, Add)
+LBANN_DEFINE_OPERATOR_BUILDER(asin, Asin)
+LBANN_DEFINE_OPERATOR_BUILDER(asinh, Asinh)
+LBANN_DEFINE_OPERATOR_BUILDER(atan, Atan)
+LBANN_DEFINE_OPERATOR_BUILDER(atanh, Atanh)
+LBANN_DEFINE_OPERATOR_BUILDER(ceil, Ceil)
+LBANN_DEFINE_OPERATOR_BUILDER(cos, Cos)
+LBANN_DEFINE_OPERATOR_BUILDER(cosh, Cosh)
+LBANN_DEFINE_OPERATOR_BUILDER(divide, Divide)
+LBANN_DEFINE_OPERATOR_BUILDER(equal, Equal)
+LBANN_DEFINE_OPERATOR_BUILDER(erf, Erf)
+LBANN_DEFINE_OPERATOR_BUILDER(erfinv, ErfInv)
+LBANN_DEFINE_OPERATOR_BUILDER(exp, Exp)
+LBANN_DEFINE_OPERATOR_BUILDER(expm1, Expm1)
+LBANN_DEFINE_OPERATOR_BUILDER(floor, Floor)
+LBANN_DEFINE_OPERATOR_BUILDER(greater, Greater)
+LBANN_DEFINE_OPERATOR_BUILDER(greater_equal, GreaterEqual)
+LBANN_DEFINE_OPERATOR_BUILDER(less, Less)
+LBANN_DEFINE_OPERATOR_BUILDER(less_equal, LessEqual)
+LBANN_DEFINE_OPERATOR_BUILDER(log, Log)
+LBANN_DEFINE_OPERATOR_BUILDER(log1p, Log1p)
+LBANN_DEFINE_OPERATOR_BUILDER(logical_and, LogicalAnd)
+LBANN_DEFINE_OPERATOR_BUILDER(logical_not, LogicalNot)
+LBANN_DEFINE_OPERATOR_BUILDER(logical_or, LogicalOr)
+LBANN_DEFINE_OPERATOR_BUILDER(logical_xor, LogicalXor)
+LBANN_DEFINE_OPERATOR_BUILDER(max, Max)
+LBANN_DEFINE_OPERATOR_BUILDER(min, Min)
+LBANN_DEFINE_OPERATOR_BUILDER(mod, Mod)
+LBANN_DEFINE_OPERATOR_BUILDER(multiply, Multiply)
+LBANN_DEFINE_OPERATOR_BUILDER(negative, Negative)
+LBANN_DEFINE_OPERATOR_BUILDER(not_equal, NotEqual)
+LBANN_DEFINE_OPERATOR_BUILDER(pow, Pow)
+LBANN_DEFINE_OPERATOR_BUILDER(reciprocal, Reciprocal)
+LBANN_DEFINE_OPERATOR_BUILDER(round, Round)
+LBANN_DEFINE_OPERATOR_BUILDER(rsqrt, Rsqrt)
+LBANN_DEFINE_OPERATOR_BUILDER(safe_divide, SafeDivide)
+LBANN_DEFINE_OPERATOR_BUILDER(safe_reciprocal, SafeReciprocal)
+LBANN_DEFINE_OPERATOR_BUILDER(sign, Sign)
+LBANN_DEFINE_OPERATOR_BUILDER(sin, Sin)
+LBANN_DEFINE_OPERATOR_BUILDER(sinh, Sinh)
+LBANN_DEFINE_OPERATOR_BUILDER(sqrt, Sqrt)
+LBANN_DEFINE_OPERATOR_BUILDER(square, Square)
+LBANN_DEFINE_OPERATOR_BUILDER(squared_difference, SquaredDifference)
+LBANN_DEFINE_OPERATOR_BUILDER(subtract, Subtract)
+LBANN_DEFINE_OPERATOR_BUILDER(tan, Tan)
+LBANN_DEFINE_OPERATOR_BUILDER(tanh, Tanh)
+#endif // LBANN_INCLUDE_LBANN_OPERATORS_MATH_MATH_BUILDERS_IMPL_HPP_INCLUDED
diff --git a/include/lbann/operators/math/unary.hpp b/include/lbann/operators/math/unary.hpp
new file mode 100644
index 00000000000..3495fc1786e
--- /dev/null
+++ b/include/lbann/operators/math/unary.hpp
@@ -0,0 +1,83 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef LBANN_INCLUDE_LBANN_OPERATORS_MATH_UNARY_HPP_INCLUDED
+#define LBANN_INCLUDE_LBANN_OPERATORS_MATH_UNARY_HPP_INCLUDED
+
+#include "lbann/operators/declare_stateless_op.hpp"
+
+namespace lbann {
+
+// These are all single-type operators.
+
+// Logical operations
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(LogicalNot, "logical not");
+
+// Sign operations
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Negative, "negative");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Sign, "sign");
+
+// Rounding operations
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Round, "round");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Ceil, "ceil");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Floor, "floor");
+
+// Power operations
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Reciprocal, "reciprocal");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Square, "square");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Sqrt, "square root");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Rsqrt, "reciprocal square root");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(SafeReciprocal, "safe reciprocal");
+
+// Exponential and logarithmic operations
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Exp, "exponential");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Expm1, "expm1");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Log, "natural logarithm");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Log1p, "log1p");
+
+// Trigonometric operations
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Cos, "cosine");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Sin, "sine");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Tan, "tangent");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Acos, "arccosine");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Asin, "arcsine");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Atan, "arctangent");
+
+// Hyperbolic operations
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Cosh, "hyperbolic cosine");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Sinh, "hyperbolic sine");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Tanh, "hyperbolic tangent");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Acosh, "hyperbolic arccosine");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Asinh, "hyperbolic arcsine");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Atanh, "hyperbolic arctangent");
+
+// Error function
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(Erf, "error function");
+LBANN_DECLARE_STATELESS_ELEMENTWISE_OPERATOR(ErfInv, "inverse error function");
+
+} // namespace lbann
+
+#endif // LBANN_INCLUDE_LBANN_OPERATORS_MATH_UNARY_HPP_INCLUDED
diff --git a/include/lbann/operators/operator.hpp b/include/lbann/operators/operator.hpp
index 7270e14f717..cdc1b5e6867 100644
--- a/include/lbann/operators/operator.hpp
+++ b/include/lbann/operators/operator.hpp
@@ -46,13 +46,6 @@
 #include <string>
 #include <vector>
 
-/** @brief A utility macro for easily adding ETI for operator builders
- *  @note Must be called inside lbann namespace.
- */
-#define LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(OPERATOR_NAME, T, D)            \
-  template std::unique_ptr<Operator<T, T, D>>                                  \
-    build_##OPERATOR_NAME##_operator<T, D>(lbann_data::Operator const&)
-
 namespace lbann {
 
 using supported_operator_data_type = h2::meta::TL<
@@ -63,7 +56,9 @@ using supported_operator_data_type = h2::meta::TL<
   cpu_fp16,
 #endif
   float,
-  double>;
+  double,
+  El::Complex<float>,
+  El::Complex<double>>;
 
 /** @brief Neural network tensor operation.
  *
@@ -132,13 +127,11 @@ class Operator : public AbstractCloneableBase<Operator<InputT, OutputT, D>>,
   ///@{
 
   template <typename ArchiveT>
-  void serialize(ArchiveT& ar){};
+  void serialize(ArchiveT& ar);
 
   ///@}
-
-  // ===========================================================
-  // Forward prop compute function
-  // ===========================================================
+  /** @name Computational interface */
+  ///@{
 
   /** @brief Apply operator's forward operation.
    *  @details Given the input tensors, the output tensors are
@@ -148,10 +141,6 @@ class Operator : public AbstractCloneableBase<Operator<InputT, OutputT, D>>,
   fp_compute(std::vector<ConstInputTensorType> const& inputs,
              std::vector<OutputTensorType> const& outputs) const = 0;
 
-  // ===========================================================
-  // Back prop compute function
-  // ===========================================================
-
   /** @brief Compute operator's "backward" operation
    *  @details Given the inputs, outputs, and gradient w.r.t. output
    *           tensors, the gradient w.r.t. input tensors are
@@ -160,7 +149,8 @@ class Operator : public AbstractCloneableBase<Operator<InputT, OutputT, D>>,
   virtual void
   bp_compute(std::vector<ConstInputTensorType> const& inputs,
              std::vector<ConstOutputTensorType> const& gradient_wrt_outputs,
-             std::vector<InputTensorType> const& gradient_wrt_inputs) const {};
+             std::vector<InputTensorType> const& gradient_wrt_inputs) const;
+  ///@}
 
 protected:
   Operator(Operator&& other) noexcept = default;
@@ -205,5 +195,17 @@ Description Operator<InputT, OutputT, D>::get_description() const
   return desc;
 }
 
+template <typename InputT, typename OutputT, El::Device D>
+void Operator<InputT, OutputT, D>::bp_compute(
+  std::vector<ConstInputTensorType> const&,
+  std::vector<ConstOutputTensorType> const&,
+  std::vector<InputTensorType> const&) const
+{}
+
+template <typename InputT, typename OutputT, El::Device D>
+template <typename ArchiveT>
+void Operator<InputT, OutputT, D>::serialize(ArchiveT& ar)
+{}
+
 } // namespace lbann
 #endif // LBANN_OPERATORS_OPERATOR_HPP_INCLUDED
diff --git a/include/lbann/proto/datatype_helpers.hpp b/include/lbann/proto/datatype_helpers.hpp
index b0bf7a0ec6e..949589f67b9 100644
--- a/include/lbann/proto/datatype_helpers.hpp
+++ b/include/lbann/proto/datatype_helpers.hpp
@@ -49,6 +49,18 @@ struct TypeToProtoDataType<double>
   static constexpr auto value = lbann_data::DOUBLE;
 };
 
+template <>
+struct TypeToProtoDataType<El::Complex<float>>
+{
+  static constexpr auto value = lbann_data::COMPLEX_FLOAT;
+};
+
+template <>
+struct TypeToProtoDataType<El::Complex<double>>
+{
+  static constexpr auto value = lbann_data::COMPLEX_DOUBLE;
+};
+
 #ifdef LBANN_HAS_HALF
 template <>
 struct TypeToProtoDataType<cpu_fp16>
diff --git a/include/lbann/proto/operator_factory_impl.hpp b/include/lbann/proto/operator_factory_impl.hpp
index 56dd439c4c4..88ab5cb13be 100644
--- a/include/lbann/proto/operator_factory_impl.hpp
+++ b/include/lbann/proto/operator_factory_impl.hpp
@@ -29,6 +29,8 @@
 #include "lbann/proto/factories.hpp"
 #include "lbann/proto/operator_factory.hpp"
 
+#include "lbann/operators/activations/activation_builders.hpp"
+#include "lbann/operators/loss/loss_builders.hpp"
 #include "lbann/operators/math/math_builders.hpp"
 #include "lbann/operators/operator.hpp"
 #include "lbann/proto/datatype_helpers.hpp"
@@ -44,9 +46,77 @@ OperatorFactory<InT, OutT, D> build_default_factory()
 {
   OperatorFactory<InT, OutT, D> factory;
 
+#define LBANN_REGISTER_BUILDER(OP_NAME, OP_LOWER)                              \
+  factory.register_builder(#OP_NAME "Operator",                                \
+                           build_##OP_LOWER##_operator<InT, D>)
+
   if constexpr (std::is_same_v<InT, OutT>) {
-    factory.register_builder("ClampOperator", build_clamp_operator<InT, D>);
+    LBANN_REGISTER_BUILDER(Acos, acos);
+    LBANN_REGISTER_BUILDER(Acosh, acosh);
+    LBANN_REGISTER_BUILDER(Add, add);
+    LBANN_REGISTER_BUILDER(Asin, asin);
+    LBANN_REGISTER_BUILDER(Asinh, asinh);
+    LBANN_REGISTER_BUILDER(Atan, atan);
+    LBANN_REGISTER_BUILDER(Atanh, atanh);
+    LBANN_REGISTER_BUILDER(BinaryCrossEntropy, binary_cross_entropy);
+    LBANN_REGISTER_BUILDER(BooleanAccuracy, boolean_accuracy);
+    LBANN_REGISTER_BUILDER(BooleanFalseNegative, boolean_false_negative);
+    LBANN_REGISTER_BUILDER(BooleanFalsePositive, boolean_false_positive);
+    LBANN_REGISTER_BUILDER(Ceil, ceil);
+    LBANN_REGISTER_BUILDER(Clamp, clamp);
+    LBANN_REGISTER_BUILDER(Cos, cos);
+    LBANN_REGISTER_BUILDER(Cosh, cosh);
+    LBANN_REGISTER_BUILDER(Divide, divide);
+    LBANN_REGISTER_BUILDER(Equal, equal);
+    LBANN_REGISTER_BUILDER(Erf, erf);
+    LBANN_REGISTER_BUILDER(ErfInv, erfinv);
+    LBANN_REGISTER_BUILDER(Exp, exp);
+    LBANN_REGISTER_BUILDER(Expm1, expm1);
+    LBANN_REGISTER_BUILDER(Floor, floor);
+    LBANN_REGISTER_BUILDER(Greater, greater);
+    LBANN_REGISTER_BUILDER(GreaterEqual, greater_equal);
+    LBANN_REGISTER_BUILDER(Less, less);
+    LBANN_REGISTER_BUILDER(LessEqual, less_equal);
+    LBANN_REGISTER_BUILDER(Log, log);
+    LBANN_REGISTER_BUILDER(Log1p, log1p);
+    LBANN_REGISTER_BUILDER(LogSigmoid, log_sigmoid);
+    LBANN_REGISTER_BUILDER(LogicalAnd, logical_and);
+    LBANN_REGISTER_BUILDER(LogicalNot, logical_not);
+    LBANN_REGISTER_BUILDER(LogicalOr, logical_or);
+    LBANN_REGISTER_BUILDER(LogicalXor, logical_xor);
+    LBANN_REGISTER_BUILDER(Max, max);
+    LBANN_REGISTER_BUILDER(Min, min);
+    LBANN_REGISTER_BUILDER(Mod, mod);
+    LBANN_REGISTER_BUILDER(Multiply, multiply);
+    LBANN_REGISTER_BUILDER(Negative, negative);
+    LBANN_REGISTER_BUILDER(NotEqual, not_equal);
+    LBANN_REGISTER_BUILDER(Pow, pow);
+    LBANN_REGISTER_BUILDER(Reciprocal, reciprocal);
+    LBANN_REGISTER_BUILDER(Round, round);
+    LBANN_REGISTER_BUILDER(Rsqrt, rsqrt);
+    LBANN_REGISTER_BUILDER(SafeDivide, safe_divide);
+    LBANN_REGISTER_BUILDER(SafeReciprocal, safe_reciprocal);
+    LBANN_REGISTER_BUILDER(Selu, selu);
+    LBANN_REGISTER_BUILDER(Sigmoid, sigmoid);
+    LBANN_REGISTER_BUILDER(SigmoidBinaryCrossEntropy,
+                           sigmoid_binary_cross_entropy);
+    LBANN_REGISTER_BUILDER(Sign, sign);
+    LBANN_REGISTER_BUILDER(Sin, sin);
+    LBANN_REGISTER_BUILDER(Sinh, sinh);
+    LBANN_REGISTER_BUILDER(Softplus, softplus);
+    LBANN_REGISTER_BUILDER(Softsign, softsign);
+    LBANN_REGISTER_BUILDER(Sqrt, sqrt);
+    LBANN_REGISTER_BUILDER(Square, square);
+    LBANN_REGISTER_BUILDER(SquaredDifference, squared_difference);
+    LBANN_REGISTER_BUILDER(Subtract, subtract);
+    LBANN_REGISTER_BUILDER(Tan, tan);
+    LBANN_REGISTER_BUILDER(Tanh, tanh);
+  }
+
+  if constexpr (std::is_same_v<OutT, El::Base<InT>>) {
+    factory.register_builder("AbsOperator", build_abs_operator<InT, D>);
   }
+
   return factory;
 }
 
diff --git a/include/lbann/utils/exception.hpp b/include/lbann/utils/exception.hpp
index c74f4eaea36..5c664bea89a 100644
--- a/include/lbann/utils/exception.hpp
+++ b/include/lbann/utils/exception.hpp
@@ -78,6 +78,12 @@
   if (!(cond))                                          \
     LBANN_ERROR("The assertion " #cond " failed.")
 
+#ifdef LBANN_DEBUG
+#define LBANN_ASSERT_DEBUG(cond) LBANN_ASSERT(cond)
+#else
+#define LBANN_ASSERT_DEBUG(cond)
+#endif
+
 #define LBANN_ASSERT_WARNING(cond)                      \
   if (!(cond))                                          \
     LBANN_WARNING("The assertion " #cond " failed.")
diff --git a/include/lbann/utils/tensor.hpp b/include/lbann/utils/tensor.hpp
index 22cef53b582..bab2330233c 100644
--- a/include/lbann/utils/tensor.hpp
+++ b/include/lbann/utils/tensor.hpp
@@ -101,7 +101,8 @@ std::vector<size_t> verify_and_get_dims(MatrixT const& A,
   auto const A_width = A.Width();
   if (dims[0] == El::To<size_t>(A_width)) {
 #ifdef LBANN_DEBUG
-    LBANN_ASSERT(get_linear_size(dims) == A_height * A_width);
+    LBANN_ASSERT(get_linear_size(dims) ==
+                 static_cast<size_t>(A_height * A_width));
 #endif
     return dims;
   }
@@ -282,7 +283,8 @@ template <typename T, El::Device D>
 class ConstDistTensorView
   : public details::MatrixAsTensorView<El::AbstractDistMatrix<T> const>
 {
-  using base_type = details::MatrixAsTensorView<El::AbstractDistMatrix<T> const>;
+  using base_type =
+    details::MatrixAsTensorView<El::AbstractDistMatrix<T> const>;
 
 public:
   template <typename MatT>
diff --git a/python/lbann/core/operators.py b/python/lbann/core/operators.py
index badb9a211d9..e21f92c23eb 100644
--- a/python/lbann/core/operators.py
+++ b/python/lbann/core/operators.py
@@ -64,3 +64,534 @@ def do_export_proto(self):
         params.min = self.min
         params.max = self.max
         return params
+
+class Abs(Operator):
+    """Apply the Abs operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.AbsOperator()
+        return params
+
+class Acos(Operator):
+    """Apply the Acos operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.AcosOperator()
+        return params
+
+class Acosh(Operator):
+    """Apply the Acosh operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.AcoshOperator()
+        return params
+
+class Add(Operator):
+    """Apply the Add operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.AddOperator()
+        return params
+
+class Asin(Operator):
+    """Apply the Asin operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.AsinOperator()
+        return params
+
+class Asinh(Operator):
+    """Apply the Asinh operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.AsinhOperator()
+        return params
+
+class Atan(Operator):
+    """Apply the Atan operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.AtanOperator()
+        return params
+
+class Atanh(Operator):
+    """Apply the Atanh operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.AtanhOperator()
+        return params
+
+class BinaryCrossEntropy(Operator):
+    """Apply the BinaryCrossEntropy operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.BinaryCrossEntropyOperator()
+        return params
+
+class BooleanAccuracy(Operator):
+    """Apply the BooleanAccuracy operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.BooleanAccuracyOperator()
+        return params
+
+class BooleanFalseNegative(Operator):
+    """Apply the BooleanFalseNegative operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.BooleanFalseNegativeOperator()
+        return params
+
+class BooleanFalsePositive(Operator):
+    """Apply the BooleanFalsePositive operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.BooleanFalsePositiveOperator()
+        return params
+
+class Ceil(Operator):
+    """Apply the Ceil operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.CeilOperator()
+        return params
+
+class Cos(Operator):
+    """Apply the Cos operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.CosOperator()
+        return params
+
+class Cosh(Operator):
+    """Apply the Cosh operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.CoshOperator()
+        return params
+
+class Divide(Operator):
+    """Apply the Divide operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.DivideOperator()
+        return params
+
+class Equal(Operator):
+    """Apply the Equal operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.EqualOperator()
+        return params
+
+class Erf(Operator):
+    """Apply the Erf operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.ErfOperator()
+        return params
+
+class ErfInv(Operator):
+    """Apply the ErfInv operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.ErfInvOperator()
+        return params
+
+class Exp(Operator):
+    """Apply the Exp operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.ExpOperator()
+        return params
+
+class Expm1(Operator):
+    """Apply the Expm1 operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.Expm1Operator()
+        return params
+
+class Floor(Operator):
+    """Apply the Floor operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.FloorOperator()
+        return params
+
+class Greater(Operator):
+    """Apply the Greater operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.GreaterOperator()
+        return params
+
+class GreaterEqual(Operator):
+    """Apply the GreaterEqual operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.GreaterEqualOperator()
+        return params
+
+class Less(Operator):
+    """Apply the Less operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.LessOperator()
+        return params
+
+class LessEqual(Operator):
+    """Apply the LessEqual operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.LessEqualOperator()
+        return params
+
+class Log(Operator):
+    """Apply the Log operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.LogOperator()
+        return params
+
+class Log1p(Operator):
+    """Apply the Log1p operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.Log1pOperator()
+        return params
+
+class LogSigmoid(Operator):
+    """Apply the LogSigmoid operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.LogSigmoidOperator()
+        return params
+
+class LogicalAnd(Operator):
+    """Apply the LogicalAnd operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.LogicalAndOperator()
+        return params
+
+class LogicalNot(Operator):
+    """Apply the LogicalNot operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.LogicalNotOperator()
+        return params
+
+class LogicalOr(Operator):
+    """Apply the LogicalOr operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.LogicalOrOperator()
+        return params
+
+class LogicalXor(Operator):
+    """Apply the LogicalXor operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.LogicalXorOperator()
+        return params
+
+class Max(Operator):
+    """Apply the Max operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.MaxOperator()
+        return params
+
+class Min(Operator):
+    """Apply the Min operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.MinOperator()
+        return params
+
+class Mod(Operator):
+    """Apply the Mod operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.ModOperator()
+        return params
+
+class Multiply(Operator):
+    """Apply the Multiply operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.MultiplyOperator()
+        return params
+
+class Negative(Operator):
+    """Apply the Negative operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.NegativeOperator()
+        return params
+
+class NotEqual(Operator):
+    """Apply the NotEqual operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.NotEqualOperator()
+        return params
+
+class Pow(Operator):
+    """Apply the Pow operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.PowOperator()
+        return params
+
+class Reciprocal(Operator):
+    """Apply the Reciprocal operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.ReciprocalOperator()
+        return params
+
+class Round(Operator):
+    """Apply the Round operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.RoundOperator()
+        return params
+
+class Rsqrt(Operator):
+    """Apply the Rsqrt operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.RsqrtOperator()
+        return params
+
+class SafeDivide(Operator):
+    """Apply the SafeDivide operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.SafeDivideOperator()
+        return params
+
+class SafeReciprocal(Operator):
+    """Apply the SafeReciprocal operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.SafeReciprocalOperator()
+        return params
+
+class Selu(Operator):
+    """Apply the Selu operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.SeluOperator()
+        return params
+
+class Sigmoid(Operator):
+    """Apply the Sigmoid operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.SigmoidOperator()
+        return params
+
+class SigmoidBinaryCrossEntropy(Operator):
+    """Apply the SigmoidBinaryCrossEntropy operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.SigmoidBinaryCrossEntropyOperator()
+        return params
+
+class Sign(Operator):
+    """Apply the Sign operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.SignOperator()
+        return params
+
+class Sin(Operator):
+    """Apply the Sin operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.SinOperator()
+        return params
+
+class Sinh(Operator):
+    """Apply the Sinh operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.SinhOperator()
+        return params
+
+class Softplus(Operator):
+    """Apply the Softplus operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.SoftplusOperator()
+        return params
+
+class Softsign(Operator):
+    """Apply the Softsign operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.SoftsignOperator()
+        return params
+
+class Sqrt(Operator):
+    """Apply the Sqrt operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.SqrtOperator()
+        return params
+
+class Square(Operator):
+    """Apply the Square operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.SquareOperator()
+        return params
+
+class SquaredDifference(Operator):
+    """Apply the SquaredDifference operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.SquaredDifferenceOperator()
+        return params
+
+class Subtract(Operator):
+    """Apply the Subtract operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.SubtractOperator()
+        return params
+
+class Tan(Operator):
+    """Apply the Tan operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.TanOperator()
+        return params
+
+class Tanh(Operator):
+    """Apply the Tanh operator entrywise."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def do_export_proto(self):
+        params = OpProto.TanhOperator()
+        return params
diff --git a/src/execution_algorithms/ltfb/mutation_strategy.cpp b/src/execution_algorithms/ltfb/mutation_strategy.cpp
index afa29753652..7f5e3997cb2 100644
--- a/src/execution_algorithms/ltfb/mutation_strategy.cpp
+++ b/src/execution_algorithms/ltfb/mutation_strategy.cpp
@@ -28,18 +28,23 @@
 
 #include "lbann/comm_impl.hpp"
 
-#include "lbann/layers/activations/activations.hpp"
 #include "lbann/layers/activations/elu.hpp"
 #include "lbann/layers/activations/leaky_relu.hpp"
 #include "lbann/layers/activations/log_softmax.hpp"
 #include "lbann/layers/activations/relu.hpp"
 #include "lbann/layers/activations/softmax.hpp"
-#include "lbann/layers/math/unary.hpp"
+#include "lbann/layers/operator_layer.hpp"
+
+#include "lbann/operators/activations/activations.hpp" // SigmoidOperator
+#include "lbann/operators/math/unary.hpp"              // TanhOperator
 
 #include "lbann/layers/learning/convolution.hpp"
 
 #include "lbann/models/model.hpp"
+#include "lbann/operators/math/unary.hpp"
 #include "lbann/utils/random.hpp"
+#include "lbann_config.hpp"
+#include <memory>
 
 #ifdef LBANN_HAS_GPU
 constexpr El::Device Dev = El::Device::GPU;
@@ -70,9 +75,10 @@ make_new_activation_layer(lbann_comm& comm,
         &comm);
   }
   else if (new_type == "tanh") {
-    layer =
-      std::make_unique<tanh_layer<DataType, data_layout::DATA_PARALLEL, Dev>>(
-        &comm);
+    layer = std::make_unique<
+      OperatorLayer<DataType, DataType, data_layout::DATA_PARALLEL, Dev>>(
+      comm,
+      std::make_unique<TanhOperator<DataType, Dev>>());
   }
   else if (new_type == "softmax") {
     layer = std::make_unique<
@@ -96,7 +102,9 @@ make_new_activation_layer(lbann_comm& comm,
   }
   else if (new_type == "sigmoid") {
     layer = std::make_unique<
-      sigmoid_layer<DataType, data_layout::DATA_PARALLEL, Dev>>(&comm);
+      OperatorLayer<DataType, DataType, data_layout::DATA_PARALLEL, Dev>>(
+      comm,
+      std::make_unique<SigmoidOperator<DataType, Dev>>());
   }
   else {
     LBANN_ERROR("Unknown new layer type: ", new_type);
diff --git a/src/layers/activations/CMakeLists.txt b/src/layers/activations/CMakeLists.txt
index 6bb4bc14beb..fc5492afc70 100644
--- a/src/layers/activations/CMakeLists.txt
+++ b/src/layers/activations/CMakeLists.txt
@@ -1,6 +1,5 @@
 # Add the source files for this directory
 set_full_path(THIS_DIR_SOURCES
-  activations.cpp
   elu.cpp
   identity.cpp
   relu.cpp
@@ -13,7 +12,6 @@ set_full_path(THIS_DIR_SOURCES
 if (LBANN_HAS_GPU)
   # Add the CUDA source files for this directory
   set_full_path(THIS_DIR_CU_SOURCES
-    activations.cu
     elu.cu
     relu.cu
     leaky_relu.cu
diff --git a/src/layers/activations/activations.cpp b/src/layers/activations/activations.cpp
deleted file mode 100644
index f461355b933..00000000000
--- a/src/layers/activations/activations.cpp
+++ /dev/null
@@ -1,168 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-
-#define LBANN_ACTIVATIONS_LAYER_INSTANTIATE
-#include "lbann/layers/activations/activations.hpp"
-#include "lbann/utils/entrywise_operator.hpp"
-
-namespace lbann {
-
-namespace {
-
-// =========================================================
-// Operator objects for entry-wise unary layers
-// =========================================================
-// Note: Unary operator corresponds to forward prop step
-// (\f$ y = f(x) \f$) and binary operator corresponds to
-// back prop step
-// (\f$ \frac{dL}{dx} = \frac{dL}{dy} f'(x) \f$).
-
-/** Log sigmoid operator. */
-template <typename TensorDataType>
-struct log_sigmoid_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    using std::log1p;
-    if (x >= El::TypeTraits<TensorDataType>::Zero()) {
-      return -log1p(El::Exp(-x));
-    } else {
-      return x - log1p(El::Exp(x));
-    }
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy / (El::TypeTraits<TensorDataType>::One() + El::Exp(x));
-  }
-};
-
-/** SELU operator. */
-template <typename TensorDataType>
-struct selu_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    using std::expm1;
-    static const auto alpha = TensorDataType(1.6732632423543772848170429916717);
-    static const auto scale = TensorDataType(1.0507009873554804934193349852946);
-    static const auto zero = TensorDataType(0.);
-    return (x > zero ?
-            scale * x :
-            scale * alpha * expm1(x));
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    static const auto alpha = TensorDataType(1.6732632423543772848170429916717);
-    static const auto scale = TensorDataType(1.0507009873554804934193349852946);
-    static const auto zero = TensorDataType(0.);
-    return (x > zero ?
-            dy * scale :
-            dy * scale * alpha * El::Exp(x));
-  }
-};
-
-/** Sigmoid operator. */
-template <typename TensorDataType>
-struct sigmoid_op {
-  TensorDataType eps = std::numeric_limits<TensorDataType>::epsilon();
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    static const auto one = El::TypeTraits<TensorDataType>::One();
-    const auto& y = one / (one + El::Exp(-x));
-#ifdef LBANN_ENABLE_SIGMOID_CUTOFF
-    if (y <= eps)            { return eps; }
-    else if (y >= one - eps) { return one - eps; }
-#endif // LBANN_ENABLE_SIGMOID_CUTOFF
-    return y;
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    static const auto one = El::TypeTraits<TensorDataType>::One();
-    const auto& y = one / (one + El::Exp(-x));
-#ifdef LBANN_ENABLE_SIGMOID_CUTOFF
-    if (y <= eps || y >= one - eps) { return El::TypeTraits<TensorDataType>::Zero(); }
-#endif // LBANN_ENABLE_SIGMOID_CUTOFF
-    return dy * y * (one - y);
-  }
-};
-
-/** Softplus operator. */
-template <typename TensorDataType>
-struct softplus_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    using std::log1p;
-    if (x > El::TypeTraits<TensorDataType>::Zero()) {
-      return log1p(El::Exp(-x)) + x;
-    } else {
-      return log1p(El::Exp(x));
-    }
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy / (El::TypeTraits<TensorDataType>::One() + El::Exp(-x));
-  }
-};
-
-/** Softsign operator. */
-template <typename TensorDataType>
-struct softsign_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    using std::fabs;
-    return x / (El::TypeTraits<TensorDataType>::One() + fabs(x));
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    using std::fabs;
-    const auto& denom = El::TypeTraits<TensorDataType>::One() + fabs(x);
-    return dy / (denom * denom);
-  }
-};
-
-} // namespace
-
-// Template instantiation
-#define DEFINE_COMPUTE_OPS(layer, op)                                   \
-  template <typename TensorDataType, data_layout Layout, El::Device Device> \
-  void layer<TensorDataType, Layout, Device>::fp_compute() {            \
-    apply_entrywise_unary_operator<op>(                                 \
-      this->get_prev_activations(),                                     \
-      this->get_activations());                                         \
-  }                                                                     \
-  template <typename TensorDataType, data_layout Layout, El::Device Device> \
-  void layer<TensorDataType, Layout, Device>::bp_compute() {            \
-    apply_entrywise_binary_operator<op>(                                \
-      this->get_prev_activations(),                                     \
-      this->get_prev_error_signals(),                                   \
-      this->get_error_signals());                                       \
-  }
-
-DEFINE_COMPUTE_OPS(log_sigmoid_layer, log_sigmoid_op)
-DEFINE_COMPUTE_OPS(selu_layer, selu_op)
-DEFINE_COMPUTE_OPS(sigmoid_layer, sigmoid_op)
-DEFINE_COMPUTE_OPS(softplus_layer, softplus_op)
-DEFINE_COMPUTE_OPS(softsign_layer, softsign_op)
-
-#define PROTO(T) \
-  UNARY_ETI_INST_MACRO_DEV_DT(log_sigmoid_layer, T, El::Device::CPU); \
-  UNARY_ETI_INST_MACRO_DEV_DT(selu_layer, T, El::Device::CPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(sigmoid_layer, T, El::Device::CPU);     \
-  UNARY_ETI_INST_MACRO_DEV_DT(softplus_layer, T, El::Device::CPU);    \
-  UNARY_ETI_INST_MACRO_DEV_DT(softsign_layer, T, El::Device::CPU)
-
-#define LBANN_INSTANTIATE_CPU_HALF
-#include "lbann/macros/instantiate.hpp"
-
-} // namespace lbann
diff --git a/src/layers/activations/activations.cu b/src/layers/activations/activations.cu
deleted file mode 100644
index 3b17fd1bbc7..00000000000
--- a/src/layers/activations/activations.cu
+++ /dev/null
@@ -1,173 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-
-#define LBANN_ACTIVATIONS_LAYER_INSTANTIATE
-#include "lbann/layers/activations/activations.hpp"
-#include "lbann/utils/gpu/helpers.hpp"
-
-namespace lbann {
-
-namespace {
-
-// =========================================================
-// Operator objects for entry-wise unary layers
-// =========================================================
-// Note: Unary operator corresponds to forward prop step
-// (\f$ y = f(x) \f$) and binary operator corresponds to
-// back prop step
-// (\f$ \frac{dL}{dx} = \frac{dL}{dy} f'(x) \f$).
-
-/** Log sigmoid operator. */
-template <typename TensorDataType>
-struct log_sigmoid_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    if (x >= TensorDataType(0.0)) {
-      return -gpu_lib::log1p(gpu_lib::exp(-x));
-    } else {
-      return x - gpu_lib::log1p(gpu_lib::exp(x));
-    }
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy / (TensorDataType(1.0) + gpu_lib::exp(x));
-  }
-};
-
-/** ReLU operator. */
-template <typename TensorDataType>
-struct relu_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::max(x, TensorDataType(0.0));
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return x > TensorDataType(0.0) ? dy : TensorDataType(0.0);
-  }
-};
-
-/** SELU operator. */
-template <typename TensorDataType>
-struct selu_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    const TensorDataType alpha = 1.6732632423543772848170429916717;
-    const TensorDataType scale = 1.0507009873554804934193349852946;
-    return (x > TensorDataType(0.0) ?
-            scale * x :
-            scale * alpha * gpu_lib::expm1(x));
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    const TensorDataType alpha = 1.6732632423543772848170429916717;
-    const TensorDataType scale = 1.0507009873554804934193349852946;
-    return (x > TensorDataType(0.0) ?
-            dy * scale :
-            dy * scale * alpha * gpu_lib::exp(x));
-  }
-};
-
-/** Sigmoid operator. */
-template <typename TensorDataType>
-struct sigmoid_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    const TensorDataType one = 1.;
-    const auto& y = one / (one + gpu_lib::exp(-x));
-#ifdef LBANN_ENABLE_SIGMOID_CUTOFF
-    const auto eps = gpu_lib::epsilon<TensorDataType>();
-    if (y <= eps) { return eps; }
-    else if (y >= one - eps) { return one - eps; }
-#endif // LBANN_ENABLE_SIGMOID_CUTOFF
-    return y;
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    const TensorDataType one = 1.;
-    const auto& y = one / (one + gpu_lib::exp(-x));
-#ifdef LBANN_ENABLE_SIGMOID_CUTOFF
-    const auto eps = gpu_lib::epsilon<TensorDataType>();
-    if (y <= eps || y >= one - eps) { return TensorDataType(0.0); }
-#endif // LBANN_ENABLE_SIGMOID_CUTOFF
-    return dy * y * (one - y);
-  }
-};
-
-/** Softplus operator. */
-template <typename TensorDataType>
-struct softplus_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    if (x > TensorDataType(0.0)) {
-      return gpu_lib::log1p(gpu_lib::exp(-x)) + x;
-    } else {
-      return gpu_lib::log1p(gpu_lib::exp(x));
-    }
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy / (TensorDataType(1.0) + gpu_lib::exp(-x));
-  }
-};
-
-/** Softsign operator. */
-template <typename TensorDataType>
-struct softsign_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return x / (TensorDataType(1.0) + gpu_lib::abs(x));
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    const auto& denom = TensorDataType(1.0) + gpu_lib::abs(x);
-    return dy / (denom * denom);
-  }
-};
-
-} // namespace
-
-// Template instantiation
-#define DEFINE_COMPUTE_OPS(layer, op)                                   \
-  template <typename TensorDataType, data_layout Layout, El::Device Device> \
-  void layer<TensorDataType, Layout, Device>::fp_compute() {            \
-    gpu_lib::apply_entrywise_unary_operator<op>(                        \
-      this->get_prev_activations(),                                     \
-      this->get_activations());                                         \
-  }                                                                     \
-  template <typename TensorDataType, data_layout Layout, El::Device Device> \
-  void layer<TensorDataType, Layout, Device>::bp_compute() {            \
-    gpu_lib::apply_entrywise_binary_operator<op>(                       \
-      this->get_prev_activations(),                                     \
-      this->get_prev_error_signals(),                                   \
-      this->get_error_signals());                                       \
-  }
-
-DEFINE_COMPUTE_OPS(log_sigmoid_layer, log_sigmoid_op)
-DEFINE_COMPUTE_OPS(selu_layer, selu_op)
-DEFINE_COMPUTE_OPS(sigmoid_layer, sigmoid_op)
-DEFINE_COMPUTE_OPS(softplus_layer, softplus_op)
-DEFINE_COMPUTE_OPS(softsign_layer, softsign_op)
-
-#define PROTO(T) \
-  UNARY_ETI_INST_MACRO_DEV_DT(log_sigmoid_layer, T, El::Device::GPU); \
-  UNARY_ETI_INST_MACRO_DEV_DT(selu_layer, T, El::Device::GPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(sigmoid_layer, T, El::Device::GPU);     \
-  UNARY_ETI_INST_MACRO_DEV_DT(softplus_layer, T, El::Device::GPU);    \
-  UNARY_ETI_INST_MACRO_DEV_DT(softsign_layer, T, El::Device::GPU)
-
-#define LBANN_INSTANTIATE_GPU_HALF
-#include "lbann/macros/instantiate.hpp"
-
-} // namespace lbann
diff --git a/src/layers/activations/cereal_registration/CMakeLists.txt b/src/layers/activations/cereal_registration/CMakeLists.txt
index 30bf1df37d4..8d6fdfbd5bd 100644
--- a/src/layers/activations/cereal_registration/CMakeLists.txt
+++ b/src/layers/activations/cereal_registration/CMakeLists.txt
@@ -3,14 +3,9 @@ set_full_path(THIS_DIR_SOURCES
   elu.cpp
   identity.cpp
   leaky_relu.cpp
-  log_sigmoid.cpp
   log_softmax.cpp
   relu.cpp
-  selu.cpp
-  sigmoid.cpp
   softmax.cpp
-  softplus.cpp
-  softsign.cpp
   )
 
 # Propagate the files up the tree
diff --git a/src/layers/activations/cereal_registration/selu.cpp b/src/layers/activations/cereal_registration/selu.cpp
deleted file mode 100644
index f26e6b99aa1..00000000000
--- a/src/layers/activations/cereal_registration/selu.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/activations/activations.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-selu_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME selu_layer
-#include "lbann/macros/register_layer_with_cereal.hpp"
diff --git a/src/layers/activations/cereal_registration/sigmoid.cpp b/src/layers/activations/cereal_registration/sigmoid.cpp
deleted file mode 100644
index 6575c4f5a5f..00000000000
--- a/src/layers/activations/cereal_registration/sigmoid.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/activations/activations.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-sigmoid_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME sigmoid_layer
-#include "lbann/macros/register_layer_with_cereal.hpp"
diff --git a/src/layers/loss/CMakeLists.txt b/src/layers/loss/CMakeLists.txt
index 6df4694b5fc..2e8bc59dd63 100644
--- a/src/layers/loss/CMakeLists.txt
+++ b/src/layers/loss/CMakeLists.txt
@@ -2,7 +2,6 @@
 set_full_path(THIS_DIR_SOURCES
   categorical_accuracy.cpp
   cross_entropy.cpp
-  entrywise.cpp
   l1_norm.cpp
   l2_norm2.cpp
   mean_absolute_error.cpp
@@ -15,7 +14,6 @@ if (LBANN_HAS_GPU)
   set_full_path(THIS_DIR_CU_SOURCES
     categorical_accuracy.cu
     cross_entropy.cu
-    entrywise.cu
     l1_norm.cu
     l2_norm2.cu
     mean_absolute_error.cu
diff --git a/src/layers/loss/cereal_registration/CMakeLists.txt b/src/layers/loss/cereal_registration/CMakeLists.txt
index 6029f110644..5fdecbabd21 100644
--- a/src/layers/loss/cereal_registration/CMakeLists.txt
+++ b/src/layers/loss/cereal_registration/CMakeLists.txt
@@ -1,16 +1,11 @@
 # Add the source files for this directory
 set_full_path(THIS_DIR_SOURCES
-  binary_cross_entropy.cpp
-  boolean_accuracy.cpp
-  boolean_false_negative.cpp
-  boolean_false_positive.cpp
   categorical_accuracy.cpp
   cross_entropy.cpp
   l1_norm.cpp
   l2_norm2.cpp
   mean_absolute_error.cpp
   mean_squared_error.cpp
-  sigmoid_binary_cross_entropy.cpp
   top_k_categorical_accuracy.cpp
   )
 
diff --git a/src/layers/loss/entrywise.cpp b/src/layers/loss/entrywise.cpp
deleted file mode 100644
index 1fabcf24c14..00000000000
--- a/src/layers/loss/entrywise.cpp
+++ /dev/null
@@ -1,255 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-
-#define LBANN_ENTRYWISE_LAYER_INSTANTIATE
-#include "lbann/layers/loss/entrywise.hpp"
-#include "lbann/utils/entrywise_operator.hpp"
-#include "lbann/utils/numerical_traits.hpp"
-
-namespace lbann {
-
-namespace {
-
-/** Apply a binary backprop operator to CPU data.
- *  The input and output data must be on CPU and must have the same
- *  dimensions. Given a binary function \f$ y = f(x_1,x_2) \f$, the
- *  corresponding BinaryBackPropOperator is a 5-ary function with the
- *  arguments \f$ x_1 \f$, \f$ x_2 \f$, \f$ dL/dy \f$, \f$ dL/dx_1\f$,
- *  \f$ dL/dx_2 \f$. The last two arguments should be overwritten when
- *  the BinaryBackPropOperator is called.
- */
-template <template <typename> class Op, typename TensorDataType>
-void apply_binary_backprop_operator(const El::AbstractMatrix<TensorDataType>& x1,
-                                    const El::AbstractMatrix<TensorDataType>& x2,
-                                    const El::AbstractMatrix<TensorDataType>& dy,
-                                    El::AbstractMatrix<TensorDataType>& dx1,
-                                    El::AbstractMatrix<TensorDataType>& dx2) {
-  using BinaryBackPropOperator = Op<TensorDataType>;
-  if (x1.Contiguous() && x2.Contiguous() && dy.Contiguous()
-      && dx1.Contiguous() && dx2.Contiguous()) {
-    const auto* x1_buffer = x1.LockedBuffer();
-    const auto* x2_buffer = x2.LockedBuffer();
-    const auto* dy_buffer = dy.LockedBuffer();
-    auto* dx1_buffer = dx1.Buffer();
-    auto* dx2_buffer = dx2.Buffer();
-    const size_t size = x1.Height() * x1.Width();
-    LBANN_OMP_PARALLEL_FOR
-    for (size_t i = 0; i < size; ++i) {
-      BinaryBackPropOperator op;
-      op(x1_buffer[i], x2_buffer[i], dy_buffer[i],
-         dx1_buffer[i], dx2_buffer[i]);
-    }
-  } else {
-    auto const width = x1.Width();
-    auto const height = x1.Height();
-    LBANN_OMP_PARALLEL_FOR_COLLAPSE2
-    for (El::Int col = 0; col < width; ++col) {
-      for (El::Int row = 0; row < height; ++row) {
-        BinaryBackPropOperator op;
-        op(x1(row, col), x2(row, col), dy(row, col),
-           dx1(row, col), dx2(row, col));
-      }
-    }
-  }
-}
-
-// =========================================================
-// Operator objects for entry-wise binary layers
-// =========================================================
-// Note: Binary operator corresponds to forward prop step
-// (\f$ y = f(x_1,x_2) \f$) and 5-ary operator corresponds
-// to back prop step
-// (\f$ \frac{dL}{dx_i} = \frac{dL}{dy} \frac{df}{dx_i}(x_1,x_2) \f$).
-
-/** Binary cross entropy operator. */
-template <typename TensorDataType>
-struct binary_cross_entropy_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                                   const TensorDataType& x2) const {
-    static const auto zero = El::TypeTraits<TensorDataType>::Zero();
-    static const auto one = El::TypeTraits<TensorDataType>::One();
-    TensorDataType y = zero;
-    if (x2 > zero) { y += -x2 * std::log(x1); }
-    if (x2 < one)  { y += -(one-x2) * std::log(one-x1); }
-    return y;
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    static const auto zero = El::TypeTraits<TensorDataType>::Zero();
-    static const auto one = El::TypeTraits<TensorDataType>::One();
-    dx2 = dx1 = zero;
-    if (dy == zero) { return; }
-    if (x2 > zero) {
-      dx1 += -x2 / x1 * dy;
-      dx2 += -std::log(x1) * dy;
-    }
-    if (x2 < one)  {
-      dx1 += (one-x2) / (one-x1) * dy;
-      dx2 += std::log(one-x1) * dy;
-    }
-  }
-};
-
-/** Sigmoid binary cross entropy operator.
- *  Equivalent to applying a sigmoid function to the first operand and
- *  then computing the binary cross entropy. Numerically stable
- *  implementation is taken from
- *  https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits.
- */
-template <typename TensorDataType>
-struct sigmoid_binary_cross_entropy_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                                   const TensorDataType& x2) const {
-    using std::exp;
-    using std::log1p;
-    static const auto zero = El::TypeTraits<TensorDataType>::Zero();
-    static const auto one = El::TypeTraits<TensorDataType>::One();
-    const auto& z = std::max(zero, std::min(x2, one));
-    if (x1 > zero) {
-      return (one - z) * x1 + log1p(exp(-x1));
-    } else {
-      return - x1 * z + log1p(exp(x1));
-    }
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    using std::exp;
-    using std::log1p;
-    static const auto zero = El::TypeTraits<TensorDataType>::Zero();
-    static const auto one = El::TypeTraits<TensorDataType>::One();
-    const auto& z = std::max(zero, std::min(x2, one));
-    if (x1 > zero) {
-      dx1 = -z + one / (one + exp(-x1));
-    } else {
-        dx1 = one - z - one / (one + exp(x1));
-    }
-    dx1 *= dy;
-    dx2 = (x2 == z) ? -x1 * dy : zero;
-  }
-};
-
-/** Boolean accuracy operator. */
-template <typename TensorDataType>
-struct boolean_accuracy_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                                   const TensorDataType& x2) const {
-    const auto& b1 = x1 >= TensorDataType(0.5);
-    const auto& b2 = x2 >= TensorDataType(0.5);
-    return b1 == b2
-        ? El::TypeTraits<TensorDataType>::One()
-        : El::TypeTraits<TensorDataType>::Zero();
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    dx2 = dx1 = El::TypeTraits<TensorDataType>::Zero();
-  }
-};
-
-/** Boolean false negative operator. */
-template <typename TensorDataType>
-struct boolean_false_negative_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                                   const TensorDataType& x2) const {
-    const auto& b1 = x1 >= TensorDataType(0.5);
-    const auto& b2 = x2 >= TensorDataType(0.5);
-    return (!b1 && b2) ? El::TypeTraits<TensorDataType>::One() : El::TypeTraits<TensorDataType>::Zero();
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    dx2 = dx1 = El::TypeTraits<TensorDataType>::Zero();
-  }
-};
-
-/** Boolean false positive operator. */
-template <typename TensorDataType>
-struct boolean_false_positive_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                                   const TensorDataType& x2) const {
-    const auto& b1 = x1 >= TensorDataType(0.5);
-    const auto& b2 = x2 >= TensorDataType(0.5);
-    return (b1 && !b2)
-        ? El::TypeTraits<TensorDataType>::One()
-        : El::TypeTraits<TensorDataType>::Zero();
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    dx2 = dx1 = El::TypeTraits<TensorDataType>::Zero();
-  }
-};
-
-} // namespace
-
-// Template instantiation
-#define DEFINE_COMPUTE_OPS(layer, op)                                   \
-  template <typename TensorDataType, data_layout Layout, El::Device Device> \
-  void layer<TensorDataType, Layout, Device>::fp_compute() {            \
-    apply_entrywise_binary_operator<op>(                                \
-      this->get_prev_activations(0),                                    \
-      this->get_prev_activations(1),                                    \
-      this->get_activations());                                         \
-  }                                                                     \
-  template <typename TensorDataType, data_layout Layout, El::Device Device> \
-  void layer<TensorDataType, Layout, Device>::bp_compute() {            \
-    apply_binary_backprop_operator<op>(                                 \
-      this->get_local_prev_activations(0),                              \
-      this->get_local_prev_activations(1),                              \
-      this->get_local_prev_error_signals(),                             \
-      this->get_local_error_signals(0),                                 \
-      this->get_local_error_signals(1));                                \
-  }
-
-DEFINE_COMPUTE_OPS(binary_cross_entropy_layer, binary_cross_entropy_op)
-DEFINE_COMPUTE_OPS(sigmoid_binary_cross_entropy_layer, sigmoid_binary_cross_entropy_op)
-DEFINE_COMPUTE_OPS(boolean_accuracy_layer, boolean_accuracy_op)
-DEFINE_COMPUTE_OPS(boolean_false_negative_layer, boolean_false_negative_op)
-DEFINE_COMPUTE_OPS(boolean_false_positive_layer, boolean_false_positive_op)
-
-#define PROTO(T) \
-  BINARY_ETI_INST_MACRO_DEV_DT(binary_cross_entropy_layer, T, El::Device::CPU); \
-  BINARY_ETI_INST_MACRO_DEV_DT(sigmoid_binary_cross_entropy_layer, T, El::Device::CPU); \
-  BINARY_ETI_INST_MACRO_DEV_DT(boolean_accuracy_layer, T, El::Device::CPU); \
-  BINARY_ETI_INST_MACRO_DEV_DT(boolean_false_negative_layer, T, El::Device::CPU); \
-  BINARY_ETI_INST_MACRO_DEV_DT(boolean_false_positive_layer, T, El::Device::CPU)
-
-#define LBANN_INSTANTIATE_CPU_HALF
-#include "lbann/macros/instantiate.hpp"
-
-} // namespace lbann
diff --git a/src/layers/math/CMakeLists.txt b/src/layers/math/CMakeLists.txt
index eefd171ffe5..b3e93454c50 100644
--- a/src/layers/math/CMakeLists.txt
+++ b/src/layers/math/CMakeLists.txt
@@ -1,19 +1,9 @@
 # Add the source files for this directory
 set_full_path(THIS_DIR_SOURCES
-  binary.cpp
   math_builders.cpp
   matmul.cpp
-  unary.cpp
   )
 
-if (LBANN_HAS_GPU)
-  # Add the CUDA source files for this directory
-  set_full_path(THIS_DIR_CU_SOURCES
-    binary.cu
-    unary.cu
-    )
-endif ()
-
 add_subdirectory(cereal_registration)
 
 # Propagate the files up the tree
diff --git a/src/layers/math/binary.cpp b/src/layers/math/binary.cpp
deleted file mode 100644
index 79e8c475371..00000000000
--- a/src/layers/math/binary.cpp
+++ /dev/null
@@ -1,512 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-
-#define LBANN_BINARY_LAYER_INSTANTIATE
-#include "lbann/layers/math/binary.hpp"
-#include "lbann/utils/entrywise_operator.hpp"
-
-namespace lbann {
-
-namespace {
-
-/** Apply a binary backprop operator to CPU data.
- *  The input and output data must be on CPU and must have the same
- *  dimensions. Given a binary function \f$ y = f(x_1,x_2) \f$, the
- *  corresponding BinaryBackPropOperator is a 5-ary function with the
- *  arguments \f$ x_1 \f$, \f$ x_2 \f$, \f$ dL/dy \f$, \f$ dL/dx_1\f$,
- *  \f$ dL/dx_2 \f$. The last two arguments should be overwritten when
- *  the BinaryBackPropOperator is called.
- */
-template <template <typename> class Op, typename TensorDataType>
-void apply_binary_backprop_operator(
-  const El::AbstractMatrix<TensorDataType>& x1,
-  const El::AbstractMatrix<TensorDataType>& x2,
-  const El::AbstractMatrix<TensorDataType>& dy,
-  El::AbstractMatrix<TensorDataType>& dx1,
-  El::AbstractMatrix<TensorDataType>& dx2) {
-  using BinaryBackPropOperator = Op<TensorDataType>;
-  if (x1.Contiguous() && x2.Contiguous() && dy.Contiguous()
-      && dx1.Contiguous() && dx2.Contiguous()) {
-    const auto* x1_buffer = x1.LockedBuffer();
-    const auto* x2_buffer = x2.LockedBuffer();
-    const auto* dy_buffer = dy.LockedBuffer();
-    auto* dx1_buffer = dx1.Buffer();
-    auto* dx2_buffer = dx2.Buffer();
-    const size_t size = x1.Height() * x1.Width();
-    LBANN_OMP_PARALLEL_FOR
-    for (size_t i = 0; i < size; ++i) {
-      BinaryBackPropOperator op;
-      op(x1_buffer[i], x2_buffer[i], dy_buffer[i],
-         dx1_buffer[i], dx2_buffer[i]);
-    }
-  } else {
-    auto const width = x1.Width();
-    auto const height = x1.Height();
-    LBANN_OMP_PARALLEL_FOR_COLLAPSE2
-    for (El::Int col = 0; col < width; ++col) {
-      for (El::Int row = 0; row < height; ++row) {
-        BinaryBackPropOperator op;
-        op(x1(row, col), x2(row, col), dy(row, col),
-           dx1(row, col), dx2(row, col));
-      }
-    }
-  }
-
-}
-
-// =========================================================
-// Operator objects for entry-wise binary layers
-// =========================================================
-// Note: Binary operator corresponds to forward prop step
-// (\f$ y = f(x_1,x_2) \f$) and 5-ary operator corresponds
-// to back prop step
-// (\f$ \frac{dL}{dx_i} = \frac{dL}{dy} \frac{df}{dx_i}(x_1,x_2) \f$).
-
-/** Add operator. */
-template <typename TensorDataType>
-struct add_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                             const TensorDataType& x2) const {
-    return x1 + x2;
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    dx1 = dy;
-    dx2 = dy;
-  }
-};
-
-/** Subtract operator. */
-template <typename TensorDataType>
-struct subtract_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                             const TensorDataType& x2) const {
-    return x1 - x2;
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    dx1 = dy;
-    dx2 = -dy;
-  }
-};
-
-/** Multiply operator. */
-template <typename TensorDataType>
-struct multiply_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                             const TensorDataType& x2) const {
-    return x1 * x2;
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    dx1 = dy * x2;
-    dx2 = dy * x1;
-  }
-};
-
-/** Divide operator. */
-template <typename TensorDataType>
-struct divide_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                             const TensorDataType& x2) const {
-    return x1 / x2;
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    dx1 = dy / x2;
-    dx2 = -dy * x1 / (x2*x2);
-  }
-};
-
-/** Modulo operator. */
-template <typename TensorDataType>
-struct mod_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                             const TensorDataType& x2) const {
-    using std::fmod;
-    return fmod(x1, x2);
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    dx1 = dy;
-    dx2 = -dy * std::floor(x1 / x2);
-  }
-};
-
-/** Power operator. */
-template <typename TensorDataType>
-struct pow_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                             const TensorDataType& x2) const {
-    return El::Pow(x1, x2);
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-
-    dx1 = dy * x2 * std::pow(x1, x2 - El::TypeTraits<TensorDataType>::One());
-    dx2 = dy * std::log(x1) * std::pow(x1, x2);
-  }
-};
-
-/** Safe divide operator.
- *  If a standard division produces an infinity or NaN, zero is output
- *  instead.
- */
-template <typename TensorDataType>
-struct safe_divide_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                             const TensorDataType& x2) const {
-    const auto& y = x1 / x2;
-    if (std::isfinite(y)) { return y; }
-    else                  { return El::TypeTraits<TensorDataType>::Zero(); }
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    const auto& y = x1 / x2;
-    if (std::isfinite(y)) {
-      dx1 = dy / x2;
-      dx2 = -dy * x1 / (x2*x2);
-    } else {
-      dx1 = El::TypeTraits<TensorDataType>::Zero();
-      dx2 = El::TypeTraits<TensorDataType>::Zero();
-    }
-  }
-};
-
-/** Squared difference operator. */
-template <typename TensorDataType>
-struct squared_difference_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                             const TensorDataType& x2) const {
-    const auto& diff = x1 - x2;
-    return diff * diff;
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    dx1 = dy * 2*(x1-x2);
-    dx2 = dy * 2*(x2-x1);
-  }
-};
-
-/** Maximum operator. */
-template <typename TensorDataType>
-struct max_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                             const TensorDataType& x2) const {
-    return std::max(x1, x2);
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    if (x1 > x2) {
-      dx1 = dy;
-      dx2 = El::TypeTraits<TensorDataType>::Zero();
-    } else if (x2 > x1) {
-      dx1 = El::TypeTraits<TensorDataType>::Zero();
-      dx2 = dy;
-    } else {
-      dx1 = dy / 2;
-      dx2 = dy / 2;
-    }
-  }
-};
-
-/** Minimum operator. */
-template <typename TensorDataType>
-struct min_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                             const TensorDataType& x2) const {
-    return std::min(x1, x2);
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    if (x1 < x2) {
-      dx1 = dy;
-      dx2 = El::TypeTraits<TensorDataType>::Zero();
-    } else if (x2 < x1) {
-      dx1 = El::TypeTraits<TensorDataType>::Zero();
-      dx2 = dy;
-    } else {
-      dx1 = dy / 2;
-      dx2 = dy / 2;
-    }
-  }
-};
-
-/** Equal operator. */
-template <typename TensorDataType>
-struct equal_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                             const TensorDataType& x2) const {
-    return x1 == x2 ? El::TypeTraits<TensorDataType>::One() : El::TypeTraits<TensorDataType>::Zero();
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    dx1 = El::TypeTraits<TensorDataType>::Zero();
-    dx2 = El::TypeTraits<TensorDataType>::Zero();
-  }
-};
-
-/** Not equal operator. */
-template <typename TensorDataType>
-struct not_equal_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                             const TensorDataType& x2) const {
-    return x1 == x2 ? El::TypeTraits<TensorDataType>::Zero() : El::TypeTraits<TensorDataType>::One();
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    dx1 = El::TypeTraits<TensorDataType>::Zero();
-    dx2 = El::TypeTraits<TensorDataType>::Zero();
-  }
-};
-
-/** Less than operator. */
-template <typename TensorDataType>
-struct less_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                             const TensorDataType& x2) const {
-    return x1 < x2 ? El::TypeTraits<TensorDataType>::One() : El::TypeTraits<TensorDataType>::Zero();
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    dx1 = El::TypeTraits<TensorDataType>::Zero();
-    dx2 = El::TypeTraits<TensorDataType>::Zero();
-  }
-};
-
-/** Less than or equal operator. */
-template <typename TensorDataType>
-struct less_equal_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                             const TensorDataType& x2) const {
-    return x1 <= x2 ? El::TypeTraits<TensorDataType>::One() : El::TypeTraits<TensorDataType>::Zero();
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    dx1 = El::TypeTraits<TensorDataType>::Zero();
-    dx2 = El::TypeTraits<TensorDataType>::Zero();
-  }
-};
-
-/** Greater than operator. */
-template <typename TensorDataType>
-struct greater_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                             const TensorDataType& x2) const {
-    return x1 > x2 ? El::TypeTraits<TensorDataType>::One() : El::TypeTraits<TensorDataType>::Zero();
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    dx1 = El::TypeTraits<TensorDataType>::Zero();
-    dx2 = El::TypeTraits<TensorDataType>::Zero();
-  }
-};
-
-/** Greater than or equal operator. */
-template <typename TensorDataType>
-struct greater_equal_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                             const TensorDataType& x2) const {
-    return x1 >= x2 ? El::TypeTraits<TensorDataType>::One() : El::TypeTraits<TensorDataType>::Zero();
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    dx1 = El::TypeTraits<TensorDataType>::Zero();
-    dx2 = El::TypeTraits<TensorDataType>::Zero();
-  }
-};
-
-/** Logical and operator. */
-template <typename TensorDataType>
-struct logical_and_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                             const TensorDataType& x2) const {
-    const auto& b1 = x1 != El::TypeTraits<TensorDataType>::Zero() && !std::isnan(x1);
-    const auto& b2 = x2 != El::TypeTraits<TensorDataType>::Zero() && !std::isnan(x2);
-    return (b1 && b2) ? El::TypeTraits<TensorDataType>::One() : El::TypeTraits<TensorDataType>::Zero();
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    dx1 = El::TypeTraits<TensorDataType>::Zero();
-    dx2 = El::TypeTraits<TensorDataType>::Zero();
-  }
-};
-
-/** Logical or operator. */
-template <typename TensorDataType>
-struct logical_or_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                             const TensorDataType& x2) const {
-    const auto& b1 = x1 != El::TypeTraits<TensorDataType>::Zero() && !std::isnan(x1);
-    const auto& b2 = x2 != El::TypeTraits<TensorDataType>::Zero() && !std::isnan(x2);
-    return (b1 || b2) ? El::TypeTraits<TensorDataType>::One() : El::TypeTraits<TensorDataType>::Zero();
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    dx1 = El::TypeTraits<TensorDataType>::Zero();
-    dx2 = El::TypeTraits<TensorDataType>::Zero();
-  }
-};
-
-/** Logical xor operator. */
-template <typename TensorDataType>
-struct logical_xor_op {
-  inline TensorDataType operator()(const TensorDataType& x1,
-                             const TensorDataType& x2) const {
-    const auto& b1 = x1 != El::TypeTraits<TensorDataType>::Zero() && !std::isnan(x1);
-    const auto& b2 = x2 != El::TypeTraits<TensorDataType>::Zero() && !std::isnan(x2);
-    return (b1 || b2) && !(b1 && b2) ? El::TypeTraits<TensorDataType>::One() : El::TypeTraits<TensorDataType>::Zero();
-  }
-  inline void operator()(const TensorDataType& x1,
-                         const TensorDataType& x2,
-                         const TensorDataType& dy,
-                         TensorDataType& dx1,
-                         TensorDataType& dx2) const {
-    dx1 = El::TypeTraits<TensorDataType>::Zero();
-    dx2 = El::TypeTraits<TensorDataType>::Zero();
-  }
-};
-
-} // namespace
-
-// Template instantiation
-#define DEFINE_COMPUTE_OPS(layer, op)                                   \
-  template <typename TensorDataType, data_layout Layout, El::Device Device> \
-  void layer<TensorDataType, Layout, Device>::fp_compute() {            \
-    apply_entrywise_binary_operator<op>(                                \
-      this->get_prev_activations(0),                                    \
-      this->get_prev_activations(1),                                    \
-      this->get_activations());                                         \
-  }                                                                     \
-  template <typename TensorDataType, data_layout Layout, El::Device Device> \
-  void layer<TensorDataType, Layout, Device>::bp_compute() {            \
-    apply_binary_backprop_operator<op>(                                 \
-      this->get_local_prev_activations(0),                              \
-      this->get_local_prev_activations(1),                              \
-      this->get_local_prev_error_signals(),                             \
-      this->get_local_error_signals(0),                                 \
-      this->get_local_error_signals(1));                                \
-  }
-
-DEFINE_COMPUTE_OPS(add_layer, add_op)
-DEFINE_COMPUTE_OPS(subtract_layer, subtract_op)
-DEFINE_COMPUTE_OPS(multiply_layer, multiply_op)
-DEFINE_COMPUTE_OPS(divide_layer, divide_op)
-DEFINE_COMPUTE_OPS(mod_layer, mod_op)
-DEFINE_COMPUTE_OPS(pow_layer, pow_op)
-DEFINE_COMPUTE_OPS(safe_divide_layer, safe_divide_op)
-DEFINE_COMPUTE_OPS(squared_difference_layer, squared_difference_op)
-DEFINE_COMPUTE_OPS(max_layer, max_op)
-DEFINE_COMPUTE_OPS(min_layer, min_op)
-DEFINE_COMPUTE_OPS(equal_layer, equal_op)
-DEFINE_COMPUTE_OPS(not_equal_layer, not_equal_op)
-DEFINE_COMPUTE_OPS(less_layer, less_op)
-DEFINE_COMPUTE_OPS(less_equal_layer, less_equal_op)
-DEFINE_COMPUTE_OPS(greater_layer, greater_op)
-DEFINE_COMPUTE_OPS(greater_equal_layer, greater_equal_op)
-DEFINE_COMPUTE_OPS(logical_and_layer, logical_and_op)
-DEFINE_COMPUTE_OPS(logical_or_layer, logical_or_op)
-DEFINE_COMPUTE_OPS(logical_xor_layer, logical_xor_op)
-
-#define PROTO(T)                                                       \
-  BINARY_ETI_INST_MACRO_DEV_DT(add_layer, T, El::Device::CPU);         \
-  BINARY_ETI_INST_MACRO_DEV_DT(subtract_layer, T, El::Device::CPU);    \
-  BINARY_ETI_INST_MACRO_DEV_DT(multiply_layer, T, El::Device::CPU);    \
-  BINARY_ETI_INST_MACRO_DEV_DT(divide_layer, T, El::Device::CPU);      \
-  BINARY_ETI_INST_MACRO_DEV_DT(mod_layer, T, El::Device::CPU);         \
-  BINARY_ETI_INST_MACRO_DEV_DT(pow_layer, T, El::Device::CPU);         \
-  BINARY_ETI_INST_MACRO_DEV_DT(safe_divide_layer, T, El::Device::CPU); \
-  BINARY_ETI_INST_MACRO_DEV_DT(squared_difference_layer, T, El::Device::CPU); \
-  BINARY_ETI_INST_MACRO_DEV_DT(max_layer, T, El::Device::CPU);         \
-  BINARY_ETI_INST_MACRO_DEV_DT(min_layer, T, El::Device::CPU);         \
-  BINARY_ETI_INST_MACRO_DEV_DT(equal_layer, T, El::Device::CPU);       \
-  BINARY_ETI_INST_MACRO_DEV_DT(not_equal_layer, T, El::Device::CPU);   \
-  BINARY_ETI_INST_MACRO_DEV_DT(less_layer, T, El::Device::CPU);        \
-  BINARY_ETI_INST_MACRO_DEV_DT(less_equal_layer, T, El::Device::CPU);  \
-  BINARY_ETI_INST_MACRO_DEV_DT(greater_layer, T, El::Device::CPU);     \
-  BINARY_ETI_INST_MACRO_DEV_DT(greater_equal_layer, T, El::Device::CPU); \
-  BINARY_ETI_INST_MACRO_DEV_DT(logical_and_layer, T, El::Device::CPU); \
-  BINARY_ETI_INST_MACRO_DEV_DT(logical_or_layer, T, El::Device::CPU);  \
-  BINARY_ETI_INST_MACRO_DEV_DT(logical_xor_layer, T, El::Device::CPU)
-
-#define LBANN_INSTANTIATE_CPU_HALF
-#include "lbann/macros/instantiate.hpp"
-
-} // namespace lbann
diff --git a/src/layers/math/binary.cu b/src/layers/math/binary.cu
deleted file mode 100644
index 08c2e21742d..00000000000
--- a/src/layers/math/binary.cu
+++ /dev/null
@@ -1,545 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-
-#define LBANN_BINARY_LAYER_INSTANTIATE
-#include "lbann/layers/math/binary.hpp"
-#include "lbann/utils/gpu/helpers.hpp"
-
-namespace lbann {
-
-namespace {
-
-/** CUDA kernel to apply an binary backprop operator. */
-template <template <typename> class BinaryBackPropOperator,
-          typename TensorDataType>
-__global__
-void binary_backprop_operator_kernel(El::Int height, El::Int width,
-                                     const TensorDataType* __restrict__ x1,
-                                     El::Int x1_ldim,
-                                     const TensorDataType* __restrict__ x2,
-                                     El::Int x2_ldim,
-                                     const TensorDataType* __restrict__ dy,
-                                     El::Int dy_ldim,
-                                     TensorDataType* __restrict__ dx1,
-                                     El::Int dx1_ldim,
-                                     TensorDataType* __restrict__ dx2,
-                                     El::Int dx2_ldim) {
-  const El::Int gid = threadIdx.x + blockIdx.x * blockDim.x;
-  const El::Int size = height * width;
-  const El::Int num_threads = blockDim.x * gridDim.x;
-  BinaryBackPropOperator<TensorDataType> op;
-  for (El::Int pos = gid; pos < size; pos += num_threads) {
-    const auto& row = pos % height;
-    const auto& col = pos / height;
-    op(x1[row + col * x1_ldim],
-       x2[row + col * x2_ldim],
-       dy[row + col * dy_ldim],
-       dx1[row + col * dx1_ldim],
-       dx2[row + col * dx2_ldim]);
-  }
-}
-
-
-/** Apply a binary backprop operator to CPU data.
- *  The input and output data must be on CPU and must have the same
- *  dimensions. Given a binary function \f$ y = f(x_1,x_2) \f$, the
- *  corresponding BinaryBackPropOperator is a 5-ary function with the
- *  arguments \f$ x_1 \f$, \f$ x_2 \f$, \f$ dL/dy \f$, \f$ dL/dx_1\f$,
- *  \f$ dL/dx_2 \f$. The last two arguments should be overwritten when
- *  the BinaryBackPropOperator is called.
- */
-template <template <typename> class BinaryBackPropOperator,
-          typename TensorDataType>
-void apply_binary_backprop_operator(const El::AbstractMatrix<TensorDataType>& x1,
-                                    const El::AbstractMatrix<TensorDataType>& x2,
-                                    const El::AbstractMatrix<TensorDataType>& dy,
-                                    El::AbstractMatrix<TensorDataType>& dx1,
-                                    El::AbstractMatrix<TensorDataType>& dx2) {
-
-  // Get CUDA grid dimensions
-  // Note: Maximum CUDA grid dimension is 2^32-1
-  // (https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications).
-  const El::Int height = x1.Height();
-  const El::Int width = x1.Width();
-  const El::Int block_dim = 256;
-  El::Int grid_dim = (height * width + block_dim - 1) / block_dim;
-  if (sizeof(El::Int) > sizeof(unsigned int)
-      && grid_dim > std::numeric_limits<uint32_t>::max()) {
-    grid_dim = std::numeric_limits<uint32_t>::max();
-  }
-
-  // Launch CUDA kernel
-  if (grid_dim > 0) {
-    auto multisync = El::MakeMultiSync(gpu::get_sync_info(dx2),
-                                       gpu::get_sync_info(dx1),
-                                       gpu::get_sync_info(dy),
-                                       gpu::get_sync_info(x2),
-                                       gpu::get_sync_info(x1));
-    hydrogen::gpu::LaunchKernel(
-      binary_backprop_operator_kernel<BinaryBackPropOperator, TensorDataType>,
-      grid_dim, block_dim, 0, multisync,
-      height, width,
-      x1.LockedBuffer(), x1.LDim(),
-      x2.LockedBuffer(), x2.LDim(),
-      dy.LockedBuffer(), dy.LDim(),
-      dx1.Buffer(), dx1.LDim(),
-      dx2.Buffer(), dx2.LDim());
-  }
-
-}
-
-// =========================================================
-// Operator objects for entry-wise binary layers
-// =========================================================
-// Note: Binary operator corresponds to forward prop step
-// (\f$ y = f(x_1,x_2) \f$) and 5-ary operator corresponds
-// to back prop step
-// (\f$ \frac{dL}{dx_i} = \frac{dL}{dy} \frac{df}{dx_i}(x_1,x_2) \f$).
-
-/** Add operator. */
-template <typename TensorDataType>
-struct add_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x1,
-                                        const TensorDataType& x2) const {
-    return x1 + x2;
-  }
-  inline __device__ void operator()(const TensorDataType& x1,
-                                    const TensorDataType& x2,
-                                    const TensorDataType& dy,
-                                    TensorDataType& dx1,
-                                    TensorDataType& dx2) const {
-    dx1 = dy;
-    dx2 = dy;
-  }
-};
-
-/** Subtract operator. */
-template <typename TensorDataType>
-struct subtract_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x1,
-                                        const TensorDataType& x2) const {
-    return x1 - x2;
-  }
-  inline __device__ void operator()(const TensorDataType& x1,
-                                    const TensorDataType& x2,
-                                    const TensorDataType& dy,
-                                    TensorDataType& dx1,
-                                    TensorDataType& dx2) const {
-    dx1 = dy;
-    dx2 = -dy;
-  }
-};
-
-/** Multiply operator. */
-template <typename TensorDataType>
-struct multiply_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x1,
-                                        const TensorDataType& x2) const {
-    return x1 * x2;
-  }
-  inline __device__ void operator()(const TensorDataType& x1,
-                                    const TensorDataType& x2,
-                                    const TensorDataType& dy,
-                                    TensorDataType& dx1,
-                                    TensorDataType& dx2) const {
-    dx1 = dy * x2;
-    dx2 = dy * x1;
-  }
-};
-
-/** Divide operator. */
-template <typename TensorDataType>
-struct divide_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x1,
-                                        const TensorDataType& x2) const {
-    return x1 / x2;
-  }
-  inline __device__ void operator()(const TensorDataType& x1,
-                                    const TensorDataType& x2,
-                                    const TensorDataType& dy,
-                                    TensorDataType& dx1,
-                                    TensorDataType& dx2) const {
-    dx1 = dy / x2;
-    dx2 = -dy * x1 / (x2*x2);
-  }
-};
-
-/** Modulo operator. */
-template <typename TensorDataType>
-struct mod_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x1,
-                                        const TensorDataType& x2) const {
-    return gpu_lib::mod(x1, x2);
-  }
-  inline __device__ void operator()(const TensorDataType& x1,
-                                    const TensorDataType& x2,
-                                    const TensorDataType& dy,
-                                    TensorDataType& dx1,
-                                    TensorDataType& dx2) const {
-    dx1 = dy;
-    dx2 = -dy * gpu_lib::floor(x1 / x2);
-  }
-};
-
-/** Power operator. */
-template <typename TensorDataType>
-struct pow_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x1,
-                                        const TensorDataType& x2) const {
-    return gpu_lib::pow(x1, x2);
-  }
-  inline __device__ void operator()(const TensorDataType& x1,
-                                    const TensorDataType& x2,
-                                    const TensorDataType& dy,
-                                    TensorDataType& dx1,
-                                    TensorDataType& dx2) const {
-
-    dx1 = dy * x2 * gpu_lib::pow(x1, x2 - TensorDataType(1.0));
-    dx2 = dy * gpu_lib::log(x1) * gpu_lib::pow(x1, x2);
-  }
-};
-
-/** Safe divide operator.
- *  If a standard division produces an infinity or NaN, zero is output
- *  instead.
- */
-template <typename TensorDataType>
-struct safe_divide_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x1,
-                                        const TensorDataType& x2) const {
-    const auto& y = x1 / x2;
-    if (gpu_lib::isfinite(y)) { return y; }
-    else             { return TensorDataType(0.0); }
-  }
-  inline __device__ void operator()(const TensorDataType& x1,
-                                    const TensorDataType& x2,
-                                    const TensorDataType& dy,
-                                    TensorDataType& dx1,
-                                    TensorDataType& dx2) const {
-    const auto& y = x1 / x2;
-    if (gpu_lib::isfinite(y)) {
-      dx1 = dy / x2;
-      dx2 = -dy * x1 / (x2*x2);
-    } else {
-      dx1 = TensorDataType(0.0);
-      dx2 = TensorDataType(0.0);
-    }
-  }
-};
-
-/** Squared difference operator. */
-template <typename TensorDataType>
-struct squared_difference_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x1,
-                                        const TensorDataType& x2) const {
-    const auto& diff = x1 - x2;
-    return diff * diff;
-  }
-  inline __device__ void operator()(const TensorDataType& x1,
-                                    const TensorDataType& x2,
-                                    const TensorDataType& dy,
-                                    TensorDataType& dx1,
-                                    TensorDataType& dx2) const {
-    dx1 = dy * TensorDataType(2.) * (x1-x2);
-    dx2 = dy * TensorDataType(2.) * (x2-x1);
-  }
-};
-
-/** Maximum operator. */
-template <typename TensorDataType>
-struct max_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x1,
-                                        const TensorDataType& x2) const {
-    return gpu_lib::max(x1, x2);
-  }
-  inline __device__ void operator()(const TensorDataType& x1,
-                                    const TensorDataType& x2,
-                                    const TensorDataType& dy,
-                                    TensorDataType& dx1,
-                                    TensorDataType& dx2) const {
-    if (x1 > x2) {
-      dx1 = dy;
-      dx2 = TensorDataType(0.0);
-    } else if (x2 > x1) {
-      dx1 = TensorDataType(0.0);
-      dx2 = dy;
-    } else {
-      dx1 = dy / TensorDataType(2.);
-      dx2 = dy / TensorDataType(2.);
-    }
-  }
-};
-
-/** Minimum operator. */
-template <typename TensorDataType>
-struct min_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x1,
-                                        const TensorDataType& x2) const {
-    return gpu_lib::min(x1, x2);
-  }
-  inline __device__ void operator()(const TensorDataType& x1,
-                                    const TensorDataType& x2,
-                                    const TensorDataType& dy,
-                                    TensorDataType& dx1,
-                                    TensorDataType& dx2) const {
-    if (x1 < x2) {
-      dx1 = dy;
-      dx2 = TensorDataType(0.0);
-    } else if (x2 < x1) {
-      dx1 = TensorDataType(0.0);
-      dx2 = dy;
-    } else {
-      dx1 = dy / TensorDataType(2.);
-      dx2 = dy / TensorDataType(2.);
-    }
-  }
-};
-
-/** Equal operator. */
-template <typename TensorDataType>
-struct equal_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x1,
-                                        const TensorDataType& x2) const {
-    return x1 == x2 ? TensorDataType(1.0) : TensorDataType(0.0);
-  }
-  inline __device__ void operator()(const TensorDataType& x1,
-                                    const TensorDataType& x2,
-                                    const TensorDataType& dy,
-                                    TensorDataType& dx1,
-                                    TensorDataType& dx2) const {
-    dx1 = TensorDataType(0.0);
-    dx2 = TensorDataType(0.0);
-  }
-};
-
-/** Not equal operator. */
-template <typename TensorDataType>
-struct not_equal_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x1,
-                                        const TensorDataType& x2) const {
-    return x1 == x2 ? TensorDataType(0.0) : TensorDataType(1.0);
-  }
-  inline __device__ void operator()(const TensorDataType& x1,
-                                    const TensorDataType& x2,
-                                    const TensorDataType& dy,
-                                    TensorDataType& dx1,
-                                    TensorDataType& dx2) const {
-    dx1 = TensorDataType(0.0);
-    dx2 = TensorDataType(0.0);
-  }
-};
-
-/** Less than operator. */
-template <typename TensorDataType>
-struct less_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x1,
-                                        const TensorDataType& x2) const {
-    return x1 < x2 ? TensorDataType(1.0) : TensorDataType(0.0);
-  }
-  inline __device__ void operator()(const TensorDataType& x1,
-                                    const TensorDataType& x2,
-                                    const TensorDataType& dy,
-                                    TensorDataType& dx1,
-                                    TensorDataType& dx2) const {
-    dx1 = TensorDataType(0.0);
-    dx2 = TensorDataType(0.0);
-  }
-};
-
-/** Less than or equal operator. */
-template <typename TensorDataType>
-struct less_equal_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x1,
-                                        const TensorDataType& x2) const {
-    return x1 <= x2 ? TensorDataType(1.0) : TensorDataType(0.0);
-  }
-  inline __device__ void operator()(const TensorDataType& x1,
-                                    const TensorDataType& x2,
-                                    const TensorDataType& dy,
-                                    TensorDataType& dx1,
-                                    TensorDataType& dx2) const {
-    dx1 = TensorDataType(0.0);
-    dx2 = TensorDataType(0.0);
-  }
-};
-
-/** Greater than operator. */
-template <typename TensorDataType>
-struct greater_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x1,
-                                        const TensorDataType& x2) const {
-    return x1 > x2 ? TensorDataType(1.0) : TensorDataType(0.0);
-  }
-  inline __device__ void operator()(const TensorDataType& x1,
-                                    const TensorDataType& x2,
-                                    const TensorDataType& dy,
-                                    TensorDataType& dx1,
-                                    TensorDataType& dx2) const {
-    dx1 = TensorDataType(0.0);
-    dx2 = TensorDataType(0.0);
-  }
-};
-
-/** Greater than or equal operator. */
-template <typename TensorDataType>
-struct greater_equal_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x1,
-                                        const TensorDataType& x2) const {
-    return x1 >= x2 ? TensorDataType(1.0) : TensorDataType(0.0);
-  }
-  inline __device__ void operator()(const TensorDataType& x1,
-                                    const TensorDataType& x2,
-                                    const TensorDataType& dy,
-                                    TensorDataType& dx1,
-                                    TensorDataType& dx2) const {
-    dx1 = TensorDataType(0.0);
-    dx2 = TensorDataType(0.0);
-  }
-};
-
-/** Logical and operator. */
-template <typename TensorDataType>
-struct logical_and_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x1,
-                                        const TensorDataType& x2) const {
-    const auto& b1 = x1 != TensorDataType(0.0) && !gpu_lib::isnan(x1);
-    const auto& b2 = x2 != TensorDataType(0.0) && !gpu_lib::isnan(x2);
-    return (b1 && b2) ? TensorDataType(1.0) : TensorDataType(0.0);
-  }
-  inline __device__ void operator()(const TensorDataType& x1,
-                                    const TensorDataType& x2,
-                                    const TensorDataType& dy,
-                                    TensorDataType& dx1,
-                                    TensorDataType& dx2) const {
-    dx1 = TensorDataType(0.0);
-    dx2 = TensorDataType(0.0);
-  }
-};
-
-/** Logical or operator. */
-template <typename TensorDataType>
-struct logical_or_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x1,
-                                        const TensorDataType& x2) const {
-    const auto& b1 = x1 != TensorDataType(0.0) && !gpu_lib::isnan(x1);
-    const auto& b2 = x2 != TensorDataType(0.0) && !gpu_lib::isnan(x2);
-    return (b1 || b2) ? TensorDataType(1.0) : TensorDataType(0.0);
-  }
-  inline __device__ void operator()(const TensorDataType& x1,
-                                    const TensorDataType& x2,
-                                    const TensorDataType& dy,
-                                    TensorDataType& dx1,
-                                    TensorDataType& dx2) const {
-    dx1 = TensorDataType(0.0);
-    dx2 = TensorDataType(0.0);
-  }
-};
-
-/** Logical xor operator. */
-template <typename TensorDataType>
-struct logical_xor_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x1,
-                                        const TensorDataType& x2) const {
-    const auto& b1 = x1 != TensorDataType(0.0) && !gpu_lib::isnan(x1);
-    const auto& b2 = x2 != TensorDataType(0.0) && !gpu_lib::isnan(x2);
-    return (b1 || b2) && !(b1 && b2) ? TensorDataType(1.0) : TensorDataType(0.0);
-  }
-  inline __device__ void operator()(const TensorDataType& x1,
-                                    const TensorDataType& x2,
-                                    const TensorDataType& dy,
-                                    TensorDataType& dx1,
-                                    TensorDataType& dx2) const {
-    dx1 = TensorDataType(0.0);
-    dx2 = TensorDataType(0.0);
-  }
-};
-
-} // namespace
-
-// Template instantiation
-#define DEFINE_COMPUTE_OPS(layer, op)                                   \
-  template <typename TensorDataType, data_layout Layout, El::Device Device> \
-  void layer<TensorDataType, Layout, Device>::fp_compute() {            \
-    gpu_lib::apply_entrywise_binary_operator<op>(                          \
-      this->get_prev_activations(0),                                    \
-      this->get_prev_activations(1),                                    \
-      this->get_activations());                                         \
-  }                                                                     \
-  template <typename TensorDataType, data_layout Layout, El::Device Device> \
-  void layer<TensorDataType, Layout, Device>::bp_compute() {            \
-    apply_binary_backprop_operator<op>(                                 \
-      this->get_local_prev_activations(0),                              \
-      this->get_local_prev_activations(1),                              \
-      this->get_local_prev_error_signals(),                             \
-      this->get_local_error_signals(0),                                 \
-      this->get_local_error_signals(1));                                \
-  }
-
-DEFINE_COMPUTE_OPS(add_layer, add_op)
-DEFINE_COMPUTE_OPS(subtract_layer, subtract_op)
-DEFINE_COMPUTE_OPS(multiply_layer, multiply_op)
-DEFINE_COMPUTE_OPS(divide_layer, divide_op)
-DEFINE_COMPUTE_OPS(mod_layer, mod_op)
-DEFINE_COMPUTE_OPS(pow_layer, pow_op)
-DEFINE_COMPUTE_OPS(safe_divide_layer, safe_divide_op)
-DEFINE_COMPUTE_OPS(squared_difference_layer, squared_difference_op)
-DEFINE_COMPUTE_OPS(max_layer, max_op)
-DEFINE_COMPUTE_OPS(min_layer, min_op)
-DEFINE_COMPUTE_OPS(equal_layer, equal_op)
-DEFINE_COMPUTE_OPS(not_equal_layer, not_equal_op)
-DEFINE_COMPUTE_OPS(less_layer, less_op)
-DEFINE_COMPUTE_OPS(less_equal_layer, less_equal_op)
-DEFINE_COMPUTE_OPS(greater_layer, greater_op)
-DEFINE_COMPUTE_OPS(greater_equal_layer, greater_equal_op)
-DEFINE_COMPUTE_OPS(logical_and_layer, logical_and_op)
-DEFINE_COMPUTE_OPS(logical_or_layer, logical_or_op)
-DEFINE_COMPUTE_OPS(logical_xor_layer, logical_xor_op)
-
-#define PROTO(T)                                                       \
-  BINARY_ETI_INST_MACRO_DEV_DT(add_layer, T, El::Device::GPU);         \
-  BINARY_ETI_INST_MACRO_DEV_DT(subtract_layer, T, El::Device::GPU);    \
-  BINARY_ETI_INST_MACRO_DEV_DT(multiply_layer, T, El::Device::GPU);    \
-  BINARY_ETI_INST_MACRO_DEV_DT(divide_layer, T, El::Device::GPU);      \
-  BINARY_ETI_INST_MACRO_DEV_DT(mod_layer, T, El::Device::GPU);         \
-  BINARY_ETI_INST_MACRO_DEV_DT(pow_layer, T, El::Device::GPU);         \
-  BINARY_ETI_INST_MACRO_DEV_DT(safe_divide_layer, T, El::Device::GPU); \
-  BINARY_ETI_INST_MACRO_DEV_DT(squared_difference_layer, T, El::Device::GPU); \
-  BINARY_ETI_INST_MACRO_DEV_DT(max_layer, T, El::Device::GPU);         \
-  BINARY_ETI_INST_MACRO_DEV_DT(min_layer, T, El::Device::GPU);         \
-  BINARY_ETI_INST_MACRO_DEV_DT(equal_layer, T, El::Device::GPU);       \
-  BINARY_ETI_INST_MACRO_DEV_DT(not_equal_layer, T, El::Device::GPU);   \
-  BINARY_ETI_INST_MACRO_DEV_DT(less_layer, T, El::Device::GPU);        \
-  BINARY_ETI_INST_MACRO_DEV_DT(less_equal_layer, T, El::Device::GPU);  \
-  BINARY_ETI_INST_MACRO_DEV_DT(greater_layer, T, El::Device::GPU);     \
-  BINARY_ETI_INST_MACRO_DEV_DT(greater_equal_layer, T, El::Device::GPU); \
-  BINARY_ETI_INST_MACRO_DEV_DT(logical_and_layer, T, El::Device::GPU); \
-  BINARY_ETI_INST_MACRO_DEV_DT(logical_or_layer, T, El::Device::GPU);  \
-  BINARY_ETI_INST_MACRO_DEV_DT(logical_xor_layer, T, El::Device::GPU)
-
-#define LBANN_INSTANTIATE_GPU_HALF
-#include "lbann/macros/instantiate.hpp"
-
-} // namespace lbann
diff --git a/src/layers/math/cereal_registration/CMakeLists.txt b/src/layers/math/cereal_registration/CMakeLists.txt
index e6c85262598..dc563ddc706 100644
--- a/src/layers/math/cereal_registration/CMakeLists.txt
+++ b/src/layers/math/cereal_registration/CMakeLists.txt
@@ -1,55 +1,6 @@
 # Add the source files for this directory
 set_full_path(THIS_DIR_SOURCES
-  abs.cpp
-  acos.cpp
-  acosh.cpp
-  add.cpp
-  asin.cpp
-  asinh.cpp
-  atan.cpp
-  atanh.cpp
-  ceil.cpp
-  cos.cpp
-  cosh.cpp
-  divide.cpp
-  equal.cpp
-  erf.cpp
-  erfinv.cpp
-  exp.cpp
-  expm1.cpp
-  floor.cpp
-  greater.cpp
-  greater_equal.cpp
-  less.cpp
-  less_equal.cpp
-  log.cpp
-  log1p.cpp
-  logical_and.cpp
-  logical_not.cpp
-  logical_or.cpp
-  logical_xor.cpp
   matmul.cpp
-  max.cpp
-  min.cpp
-  mod.cpp
-  multiply.cpp
-  negative.cpp
-  not_equal.cpp
-  pow.cpp
-  reciprocal.cpp
-  round.cpp
-  rsqrt.cpp
-  safe_divide.cpp
-  safe_reciprocal.cpp
-  sign.cpp
-  sin.cpp
-  sinh.cpp
-  sqrt.cpp
-  square.cpp
-  squared_difference.cpp
-  subtract.cpp
-  tan.cpp
-  tanh.cpp
   )
 
 # Propagate the files up the tree
diff --git a/src/layers/math/cereal_registration/add.cpp b/src/layers/math/cereal_registration/add.cpp
deleted file mode 100644
index c6e1d01a0dc..00000000000
--- a/src/layers/math/cereal_registration/add.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/binary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-add_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME add_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/asin.cpp b/src/layers/math/cereal_registration/asin.cpp
deleted file mode 100644
index 0c6f1f58b64..00000000000
--- a/src/layers/math/cereal_registration/asin.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-asin_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME asin_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/asinh.cpp b/src/layers/math/cereal_registration/asinh.cpp
deleted file mode 100644
index 0e763e17ef2..00000000000
--- a/src/layers/math/cereal_registration/asinh.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-asinh_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME asinh_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/atan.cpp b/src/layers/math/cereal_registration/atan.cpp
deleted file mode 100644
index e97387dc6e7..00000000000
--- a/src/layers/math/cereal_registration/atan.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-atan_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME atan_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/atanh.cpp b/src/layers/math/cereal_registration/atanh.cpp
deleted file mode 100644
index 56377388972..00000000000
--- a/src/layers/math/cereal_registration/atanh.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-atanh_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME atanh_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/ceil.cpp b/src/layers/math/cereal_registration/ceil.cpp
deleted file mode 100644
index e4f5c2570f1..00000000000
--- a/src/layers/math/cereal_registration/ceil.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-ceil_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME ceil_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/cosh.cpp b/src/layers/math/cereal_registration/cosh.cpp
deleted file mode 100644
index 68e4d20ac6a..00000000000
--- a/src/layers/math/cereal_registration/cosh.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-cosh_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME cosh_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/divide.cpp b/src/layers/math/cereal_registration/divide.cpp
deleted file mode 100644
index a47b3670756..00000000000
--- a/src/layers/math/cereal_registration/divide.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/binary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-divide_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME divide_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/equal.cpp b/src/layers/math/cereal_registration/equal.cpp
deleted file mode 100644
index cf0384bbc7d..00000000000
--- a/src/layers/math/cereal_registration/equal.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/binary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-equal_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME equal_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/erfinv.cpp b/src/layers/math/cereal_registration/erfinv.cpp
deleted file mode 100644
index 26666a019a7..00000000000
--- a/src/layers/math/cereal_registration/erfinv.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-erfinv_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME erfinv_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/expm1.cpp b/src/layers/math/cereal_registration/expm1.cpp
deleted file mode 100644
index 688e3866cf5..00000000000
--- a/src/layers/math/cereal_registration/expm1.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-expm1_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME expm1_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/floor.cpp b/src/layers/math/cereal_registration/floor.cpp
deleted file mode 100644
index 449596412c0..00000000000
--- a/src/layers/math/cereal_registration/floor.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-floor_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME floor_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/greater.cpp b/src/layers/math/cereal_registration/greater.cpp
deleted file mode 100644
index 4809b80f676..00000000000
--- a/src/layers/math/cereal_registration/greater.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/binary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-greater_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME greater_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/greater_equal.cpp b/src/layers/math/cereal_registration/greater_equal.cpp
deleted file mode 100644
index fdc5230ceb5..00000000000
--- a/src/layers/math/cereal_registration/greater_equal.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/binary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-greater_equal_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME greater_equal_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/less.cpp b/src/layers/math/cereal_registration/less.cpp
deleted file mode 100644
index 389d29855c4..00000000000
--- a/src/layers/math/cereal_registration/less.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/binary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-less_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME less_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/less_equal.cpp b/src/layers/math/cereal_registration/less_equal.cpp
deleted file mode 100644
index eb8d0f625b7..00000000000
--- a/src/layers/math/cereal_registration/less_equal.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/binary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-less_equal_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME less_equal_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/log1p.cpp b/src/layers/math/cereal_registration/log1p.cpp
deleted file mode 100644
index 79b489785ed..00000000000
--- a/src/layers/math/cereal_registration/log1p.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-log1p_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME log1p_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/logical_and.cpp b/src/layers/math/cereal_registration/logical_and.cpp
deleted file mode 100644
index 1b2481cef06..00000000000
--- a/src/layers/math/cereal_registration/logical_and.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/binary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-logical_and_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME logical_and_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/logical_not.cpp b/src/layers/math/cereal_registration/logical_not.cpp
deleted file mode 100644
index 7deac232b33..00000000000
--- a/src/layers/math/cereal_registration/logical_not.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-logical_not_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME logical_not_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/logical_or.cpp b/src/layers/math/cereal_registration/logical_or.cpp
deleted file mode 100644
index c2ad2a16f8b..00000000000
--- a/src/layers/math/cereal_registration/logical_or.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/binary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-logical_or_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME logical_or_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/logical_xor.cpp b/src/layers/math/cereal_registration/logical_xor.cpp
deleted file mode 100644
index 204b5b3a520..00000000000
--- a/src/layers/math/cereal_registration/logical_xor.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/binary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-logical_xor_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME logical_xor_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/max.cpp b/src/layers/math/cereal_registration/max.cpp
deleted file mode 100644
index ea4cf82cbb4..00000000000
--- a/src/layers/math/cereal_registration/max.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/binary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-max_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME max_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/min.cpp b/src/layers/math/cereal_registration/min.cpp
deleted file mode 100644
index d8aeef08a80..00000000000
--- a/src/layers/math/cereal_registration/min.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/binary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-min_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME min_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/mod.cpp b/src/layers/math/cereal_registration/mod.cpp
deleted file mode 100644
index aa6807ac6e5..00000000000
--- a/src/layers/math/cereal_registration/mod.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/binary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-mod_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME mod_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/multiply.cpp b/src/layers/math/cereal_registration/multiply.cpp
deleted file mode 100644
index 010c3ebb6a7..00000000000
--- a/src/layers/math/cereal_registration/multiply.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/binary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-multiply_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME multiply_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/negative.cpp b/src/layers/math/cereal_registration/negative.cpp
deleted file mode 100644
index 83112afbcfa..00000000000
--- a/src/layers/math/cereal_registration/negative.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-negative_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME negative_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/not_equal.cpp b/src/layers/math/cereal_registration/not_equal.cpp
deleted file mode 100644
index 938829ab973..00000000000
--- a/src/layers/math/cereal_registration/not_equal.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/binary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-not_equal_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME not_equal_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/pow.cpp b/src/layers/math/cereal_registration/pow.cpp
deleted file mode 100644
index b8b8b152e4b..00000000000
--- a/src/layers/math/cereal_registration/pow.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/binary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-pow_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME pow_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/reciprocal.cpp b/src/layers/math/cereal_registration/reciprocal.cpp
deleted file mode 100644
index 1e774d52a39..00000000000
--- a/src/layers/math/cereal_registration/reciprocal.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-reciprocal_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME reciprocal_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/round.cpp b/src/layers/math/cereal_registration/round.cpp
deleted file mode 100644
index fdf4e2fc2c5..00000000000
--- a/src/layers/math/cereal_registration/round.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-round_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME round_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/rsqrt.cpp b/src/layers/math/cereal_registration/rsqrt.cpp
deleted file mode 100644
index 7c5f76ab940..00000000000
--- a/src/layers/math/cereal_registration/rsqrt.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-rsqrt_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME rsqrt_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/safe_divide.cpp b/src/layers/math/cereal_registration/safe_divide.cpp
deleted file mode 100644
index 6a1c207e275..00000000000
--- a/src/layers/math/cereal_registration/safe_divide.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/binary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-safe_divide_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME safe_divide_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/safe_reciprocal.cpp b/src/layers/math/cereal_registration/safe_reciprocal.cpp
deleted file mode 100644
index 32605e8237e..00000000000
--- a/src/layers/math/cereal_registration/safe_reciprocal.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-safe_reciprocal_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME safe_reciprocal_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/sign.cpp b/src/layers/math/cereal_registration/sign.cpp
deleted file mode 100644
index 7cc76d06704..00000000000
--- a/src/layers/math/cereal_registration/sign.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-sign_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME sign_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/sinh.cpp b/src/layers/math/cereal_registration/sinh.cpp
deleted file mode 100644
index 21eacd120c5..00000000000
--- a/src/layers/math/cereal_registration/sinh.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-sinh_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME sinh_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/sqrt.cpp b/src/layers/math/cereal_registration/sqrt.cpp
deleted file mode 100644
index 86842c2ede1..00000000000
--- a/src/layers/math/cereal_registration/sqrt.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-sqrt_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME sqrt_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/square.cpp b/src/layers/math/cereal_registration/square.cpp
deleted file mode 100644
index 54f23fec0f1..00000000000
--- a/src/layers/math/cereal_registration/square.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-square_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME square_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/squared_difference.cpp b/src/layers/math/cereal_registration/squared_difference.cpp
deleted file mode 100644
index df424545895..00000000000
--- a/src/layers/math/cereal_registration/squared_difference.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/binary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-squared_difference_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME squared_difference_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/subtract.cpp b/src/layers/math/cereal_registration/subtract.cpp
deleted file mode 100644
index ae52512bcf9..00000000000
--- a/src/layers/math/cereal_registration/subtract.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/binary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-subtract_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME subtract_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/tanh.cpp b/src/layers/math/cereal_registration/tanh.cpp
deleted file mode 100644
index 8d82868f894..00000000000
--- a/src/layers/math/cereal_registration/tanh.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
-
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-tanh_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
-
-#define LBANN_LAYER_NAME tanh_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
diff --git a/src/layers/math/math_builders.cpp b/src/layers/math/math_builders.cpp
index 6e6a7e427d4..ae6f9ae287f 100644
--- a/src/layers/math/math_builders.cpp
+++ b/src/layers/math/math_builders.cpp
@@ -24,10 +24,8 @@
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
 
-#include <lbann/layers/math/binary.hpp>
 #include <lbann/layers/math/math_builders.hpp>
 #include <lbann/layers/math/matmul.hpp>
-#include <lbann/layers/math/unary.hpp>
 
 #include <lbann/proto/proto_common.hpp>
 #include <layers.pb.h>
@@ -56,106 +54,7 @@ std::unique_ptr<Layer> build_matmul_layer_from_pbuf(
   }
 }
 
-LBANN_LAYER_DEFAULT_BUILDER(abs);
-LBANN_LAYER_DEFAULT_BUILDER(acos);
-LBANN_LAYER_DEFAULT_BUILDER(acosh);
-LBANN_LAYER_DEFAULT_BUILDER(add);
-LBANN_LAYER_DEFAULT_BUILDER(asin);
-LBANN_LAYER_DEFAULT_BUILDER(asinh);
-LBANN_LAYER_DEFAULT_BUILDER(atan);
-LBANN_LAYER_DEFAULT_BUILDER(atanh);
-LBANN_LAYER_DEFAULT_BUILDER(ceil);
-LBANN_LAYER_DEFAULT_BUILDER(cos);
-LBANN_LAYER_DEFAULT_BUILDER(cosh);
-LBANN_LAYER_DEFAULT_BUILDER(divide);
-LBANN_LAYER_DEFAULT_BUILDER(equal);
-LBANN_LAYER_DEFAULT_BUILDER(exp);
-LBANN_LAYER_DEFAULT_BUILDER(expm1);
-LBANN_LAYER_DEFAULT_BUILDER(floor);
-LBANN_LAYER_DEFAULT_BUILDER(greater);
-LBANN_LAYER_DEFAULT_BUILDER(greater_equal);
-LBANN_LAYER_DEFAULT_BUILDER(erf);
-LBANN_LAYER_DEFAULT_BUILDER(erfinv);
-LBANN_LAYER_DEFAULT_BUILDER(less);
-LBANN_LAYER_DEFAULT_BUILDER(less_equal);
-LBANN_LAYER_DEFAULT_BUILDER(log);
-LBANN_LAYER_DEFAULT_BUILDER(log1p);
-LBANN_LAYER_DEFAULT_BUILDER(logical_and);
-LBANN_LAYER_DEFAULT_BUILDER(logical_not);
-LBANN_LAYER_DEFAULT_BUILDER(logical_or);
-LBANN_LAYER_DEFAULT_BUILDER(logical_xor);
-LBANN_LAYER_DEFAULT_BUILDER(max);
-LBANN_LAYER_DEFAULT_BUILDER(min);
-LBANN_LAYER_DEFAULT_BUILDER(mod);
-LBANN_LAYER_DEFAULT_BUILDER(multiply);
-LBANN_LAYER_DEFAULT_BUILDER(negative);
-LBANN_LAYER_DEFAULT_BUILDER(not_equal);
-LBANN_LAYER_DEFAULT_BUILDER(pow);
-LBANN_LAYER_DEFAULT_BUILDER(reciprocal);
-LBANN_LAYER_DEFAULT_BUILDER(round);
-LBANN_LAYER_DEFAULT_BUILDER(rsqrt);
-LBANN_LAYER_DEFAULT_BUILDER(safe_divide);
-LBANN_LAYER_DEFAULT_BUILDER(safe_reciprocal);
-LBANN_LAYER_DEFAULT_BUILDER(sign);
-LBANN_LAYER_DEFAULT_BUILDER(sin);
-LBANN_LAYER_DEFAULT_BUILDER(sinh);
-LBANN_LAYER_DEFAULT_BUILDER(sqrt);
-LBANN_LAYER_DEFAULT_BUILDER(square);
-LBANN_LAYER_DEFAULT_BUILDER(squared_difference);
-LBANN_LAYER_DEFAULT_BUILDER(subtract);
-LBANN_LAYER_DEFAULT_BUILDER(tan);
-LBANN_LAYER_DEFAULT_BUILDER(tanh);
-
 #define PROTO_DEVICE(T,D)                               \
-  LBANN_LAYER_BUILDER_ETI(abs, T, D);                   \
-  LBANN_LAYER_BUILDER_ETI(acos, T, D);                  \
-  LBANN_LAYER_BUILDER_ETI(acosh, T, D);                 \
-  LBANN_LAYER_BUILDER_ETI(add, T, D);                   \
-  LBANN_LAYER_BUILDER_ETI(asin, T, D);                  \
-  LBANN_LAYER_BUILDER_ETI(asinh, T, D);                 \
-  LBANN_LAYER_BUILDER_ETI(atan, T, D);                  \
-  LBANN_LAYER_BUILDER_ETI(atanh, T, D);                 \
-  LBANN_LAYER_BUILDER_ETI(ceil, T, D);                  \
-  LBANN_LAYER_BUILDER_ETI(cos, T, D);                   \
-  LBANN_LAYER_BUILDER_ETI(cosh, T, D);                  \
-  LBANN_LAYER_BUILDER_ETI(divide, T, D);                \
-  LBANN_LAYER_BUILDER_ETI(equal, T, D);                 \
-  LBANN_LAYER_BUILDER_ETI(exp, T, D);                   \
-  LBANN_LAYER_BUILDER_ETI(expm1, T, D);                 \
-  LBANN_LAYER_BUILDER_ETI(floor, T, D);                 \
-  LBANN_LAYER_BUILDER_ETI(greater, T, D);               \
-  LBANN_LAYER_BUILDER_ETI(greater_equal, T, D);         \
-  LBANN_LAYER_BUILDER_ETI(erf, T, D);                   \
-  LBANN_LAYER_BUILDER_ETI(erfinv, T, D);                \
-  LBANN_LAYER_BUILDER_ETI(less, T, D);                  \
-  LBANN_LAYER_BUILDER_ETI(less_equal, T, D);            \
-  LBANN_LAYER_BUILDER_ETI(log, T, D);                   \
-  LBANN_LAYER_BUILDER_ETI(log1p, T, D);                 \
-  LBANN_LAYER_BUILDER_ETI(logical_and, T, D);           \
-  LBANN_LAYER_BUILDER_ETI(logical_not, T, D);           \
-  LBANN_LAYER_BUILDER_ETI(logical_or, T, D);            \
-  LBANN_LAYER_BUILDER_ETI(logical_xor, T, D);           \
-  LBANN_LAYER_BUILDER_ETI(matmul, T, D);                \
-  LBANN_LAYER_BUILDER_ETI(max, T, D);                   \
-  LBANN_LAYER_BUILDER_ETI(min, T, D);                   \
-  LBANN_LAYER_BUILDER_ETI(mod, T, D);                   \
-  LBANN_LAYER_BUILDER_ETI(multiply, T, D);              \
-  LBANN_LAYER_BUILDER_ETI(negative, T, D);              \
-  LBANN_LAYER_BUILDER_ETI(not_equal, T, D);             \
-  LBANN_LAYER_BUILDER_ETI(pow, T, D);                   \
-  LBANN_LAYER_BUILDER_ETI(reciprocal, T, D);            \
-  LBANN_LAYER_BUILDER_ETI(round, T, D);                 \
-  LBANN_LAYER_BUILDER_ETI(rsqrt, T, D);                 \
-  LBANN_LAYER_BUILDER_ETI(safe_divide, T, D);           \
-  LBANN_LAYER_BUILDER_ETI(safe_reciprocal, T, D);       \
-  LBANN_LAYER_BUILDER_ETI(sign, T, D);                  \
-  LBANN_LAYER_BUILDER_ETI(sin, T, D);                   \
-  LBANN_LAYER_BUILDER_ETI(sinh, T, D);                  \
-  LBANN_LAYER_BUILDER_ETI(sqrt, T, D);                  \
-  LBANN_LAYER_BUILDER_ETI(square, T, D);                \
-  LBANN_LAYER_BUILDER_ETI(squared_difference, T, D);    \
-  LBANN_LAYER_BUILDER_ETI(subtract, T, D);              \
-  LBANN_LAYER_BUILDER_ETI(tan, T, D);                   \
-  LBANN_LAYER_BUILDER_ETI(tanh, T, D)
+  LBANN_LAYER_BUILDER_ETI(matmul, T, D)
 #include <lbann/macros/instantiate_device.hpp>
 } // namespace lbann
diff --git a/src/layers/math/unary.cpp b/src/layers/math/unary.cpp
deleted file mode 100644
index 84a23b6711f..00000000000
--- a/src/layers/math/unary.cpp
+++ /dev/null
@@ -1,514 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-
-#define LBANN_UNARY_LAYER_INSTANTIATE
-#include "lbann/layers/math/unary.hpp"
-#include "lbann/utils/entrywise_operator.hpp"
-
-namespace lbann {
-
-namespace {
-
-// =========================================================
-// Operator objects for entry-wise unary layers
-// =========================================================
-// Note: Unary operator corresponds to forward prop step
-// (\f$ y = f(x) \f$) and binary operator corresponds to
-// back prop step
-// (\f$ \frac{dL}{dx} = \frac{dL}{dy} f'(x) \f$).
-
-/** Logical not operator. */
-template <typename TensorDataType>
-struct logical_not_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    const auto& b = x != El::TypeTraits<TensorDataType>::Zero() && !std::isnan(x);
-    return !b ? El::TypeTraits<TensorDataType>::One() : El::TypeTraits<TensorDataType>::Zero();
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return El::TypeTraits<TensorDataType>::Zero();
-  }
-};
-
-/** Absolute value operator. */
-template <typename TensorDataType>
-struct abs_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return x >= El::TypeTraits<TensorDataType>::Zero() ? x : -x;
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    if      (x > El::TypeTraits<TensorDataType>::Zero()) { return dy;   }
-    else if (x < El::TypeTraits<TensorDataType>::Zero()) { return -dy;  }
-    else               { return El::TypeTraits<TensorDataType>::Zero(); }
-  }
-};
-
-/** Negative operator. */
-template <typename TensorDataType>
-struct negative_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return -x;
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return -dy;
-  }
-};
-
-/** Sign operator. */
-template <typename TensorDataType>
-struct sign_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    if      (x > El::TypeTraits<TensorDataType>::Zero()) { return El::TypeTraits<TensorDataType>::One();  }
-    else if (x < El::TypeTraits<TensorDataType>::Zero()) { return -El::TypeTraits<TensorDataType>::One(); }
-    else               { return El::TypeTraits<TensorDataType>::Zero(); }
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return El::TypeTraits<TensorDataType>::Zero();
-  }
-};
-
-/** Round operator. */
-template <typename TensorDataType>
-struct round_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    using std::round;
-    return round(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return El::TypeTraits<TensorDataType>::Zero();
-  }
-};
-
-/** Ceiling operator. */
-template <typename TensorDataType>
-struct ceil_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    using std::ceil;
-    return ceil(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return El::TypeTraits<TensorDataType>::Zero();
-  }
-};
-
-/** Floor operator. */
-template <typename TensorDataType>
-struct floor_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    using std::floor;
-    return floor(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return El::TypeTraits<TensorDataType>::Zero();
-  }
-};
-
-/** Reciprocal operator.
- *  If a standard reciprocal produces an infinity or NaN, El::TypeTraits<TensorDataType>::Zero() is
- *  output instead.
- */
-template <typename TensorDataType>
-struct reciprocal_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return El::To<TensorDataType>(1) / x;
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    if (dy == El::TypeTraits<TensorDataType>::Zero()) { return El::TypeTraits<TensorDataType>::Zero(); }
-    else            { return - dy / (x*x); }
-  }
-};
-
-/** Square operator. */
-template <typename TensorDataType>
-struct square_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return x*x;
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return El::To<TensorDataType>(2)*x * dy;
-  }
-};
-
-
-/** Square root operator. */
-template <typename TensorDataType>
-struct sqrt_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return El::Sqrt(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy / (El::To<TensorDataType>(2) * El::Sqrt(x));
-  }
-};
-
-/** Reciprocal square root operator. */
-template <typename TensorDataType>
-struct rsqrt_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return El::To<TensorDataType>(1) / El::Sqrt(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    const auto& s = El::Sqrt(x);
-    return - dy / (El::To<TensorDataType>(2) * x * s);
-  }
-};
-
-/** Safe reciprocal operator. */
-template <typename TensorDataType>
-struct safe_reciprocal_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    const auto& y = El::To<TensorDataType>(1) / x;
-    if (std::isfinite(y)) { return y; }
-    else                  { return El::TypeTraits<TensorDataType>::Zero(); }
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    const auto& y = El::To<TensorDataType>(1) / x;
-    if (std::isfinite(y)) { return - dy * y*y; }
-    else                  { return El::TypeTraits<TensorDataType>::Zero(); }
-  }
-};
-
-/** Exponential operator. */
-template <typename TensorDataType>
-struct exp_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return El::Exp(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy * El::Exp(x);
-  }
-};
-
-/** Exponential minus one operator. */
-template <typename TensorDataType>
-struct expm1_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    using std::expm1;
-    return expm1(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy * El::Exp(x);
-  }
-};
-
-/** Natural logarithm operator. */
-template <typename TensorDataType>
-struct log_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return El::Log(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy / x;
-  }
-};
-
-/** Natural logarithm one plus operator. */
-template <typename TensorDataType>
-struct log1p_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    using std::log1p;
-    return log1p(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy / (x + El::TypeTraits<TensorDataType>::One());
-  }
-};
-
-/** Cosine operator. */
-template <typename TensorDataType>
-struct cos_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return El::Cos(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return -dy * El::Sin(x);
-  }
-};
-
-/** Sine operator. */
-template <typename TensorDataType>
-struct sin_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return El::Sin(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy * El::Cos(x);
-  }
-};
-
-/** Tangent operator. */
-template <typename TensorDataType>
-struct tan_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return El::Tan(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    const auto& c = El::Cos(x);
-    return dy / (c*c);
-  }
-};
-
-/** Arccosine operator. */
-template <typename TensorDataType>
-struct acos_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return El::Acos(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return -dy / El::Sqrt(El::TypeTraits<TensorDataType>::One() - x*x);
-  }
-};
-
-/** Arcsine operator. */
-template <typename TensorDataType>
-struct asin_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return El::Asin(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy / El::Sqrt(El::TypeTraits<TensorDataType>::One() - x*x);
-  }
-};
-
-/** Arctangent operator. */
-template <typename TensorDataType>
-struct atan_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return El::Atan(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy / (El::TypeTraits<TensorDataType>::One() + x*x);
-  }
-};
-
-/** Hyperbolic cosine operator. */
-template <typename TensorDataType>
-struct cosh_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return El::Cosh(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy * El::Sinh(x);
-  }
-};
-
-/** Hyperbolic sine operator. */
-template <typename TensorDataType>
-struct sinh_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return El::Sinh(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy * El::Cosh(x);
-  }
-};
-
-/** Hyperbolic tangent operator. */
-template <typename TensorDataType>
-struct tanh_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return El::Tanh(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    const auto& c = El::Cosh(x);
-    return dy / (c*c);
-  }
-};
-
-/** Hyperbolic arccosine operator. */
-template <typename TensorDataType>
-struct acosh_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return El::Acosh(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return -dy / (El::Sqrt(x - El::TypeTraits<TensorDataType>::One()) * El::Sqrt(x + El::TypeTraits<TensorDataType>::One()));
-  }
-};
-
-/** Hyperbolic arcsine operator. */
-template <typename TensorDataType>
-struct asinh_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return El::Asinh(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy / El::Sqrt(El::TypeTraits<TensorDataType>::One() + x*x);
-  }
-};
-
-/** Hyperbolic arctangent operator. */
-template <typename TensorDataType>
-struct atanh_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return El::Atanh(x);
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy / (El::TypeTraits<TensorDataType>::One() - x*x);
-  }
-};
-
-/** Error function operator. */
-template <typename TensorDataType>
-struct erf_op {
-  inline TensorDataType operator()(const TensorDataType& x) const {
-    return El::To<TensorDataType>(std::erf(El::To<double>(x)));
-  }
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    const auto two_rsqrt_pi = El::To<TensorDataType>(1.12837916709551257389);
-    return dy * two_rsqrt_pi * El::Exp(-x*x);
-  }
-};
-
-/** Inverse error function operator. */
-template <typename TensorDataType>
-struct erfinv_op {
-
-  inline TensorDataType operator()(const TensorDataType& x) const {
-
-    // Trivial cases
-    const TensorDataType inf = std::numeric_limits<TensorDataType>::infinity();
-    if (x <= -El::TypeTraits<TensorDataType>::One()) {
-      return -inf;
-    }
-    if (x >= El::TypeTraits<TensorDataType>::One()) {
-      return inf;
-    }
-
-    // Apply Newton's method
-    const double x_ = El::To<double>(x);
-    double y = x_;
-    constexpr double half_sqrt_pi = 0.88622692545275801364;
-    constexpr double eps = std::numeric_limits<double>::epsilon();
-    constexpr int max_iters = 50;
-    for (int iter = 0; iter < max_iters; ++iter) {
-      const double err = std::erf(y) - x_;
-      if (std::isinf(y) || std::abs(err) < eps) {
-        break;
-      }
-      y -= err * half_sqrt_pi * std::exp(y*y);
-    }
-    return El::To<TensorDataType>(y);
-
-  }
-
-  inline TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    if (El::Abs(x) >= El::TypeTraits<TensorDataType>::One()) {
-      return El::TypeTraits<TensorDataType>::Zero();
-    }
-    else {
-      const auto half_sqrt_pi = El::To<TensorDataType>(0.88622692545275801364);
-      const auto y = (*this)(x);
-      return dy * half_sqrt_pi * El::Exp(y*y);
-    }
-  }
-
-};
-
-} // namespace
-
-// Template instantiation
-#define DEFINE_COMPUTE_OPS(layer, op)                                   \
-  template <typename TensorDataType, data_layout Layout, El::Device Device> \
-  void layer<TensorDataType, Layout, Device>::fp_compute() {            \
-      apply_entrywise_unary_operator<op>(                               \
-        this->get_prev_activations(),                                   \
-    this->get_activations());                                           \
-  }                                                                     \
-  template <typename TensorDataType, data_layout Layout, El::Device Device> \
-  void layer<TensorDataType, Layout, Device>::bp_compute() {            \
-    apply_entrywise_binary_operator<op>(                                \
-      this->get_prev_activations(),                                     \
-      this->get_prev_error_signals(),                                   \
-      this->get_error_signals());                                       \
-  }
-
-DEFINE_COMPUTE_OPS(logical_not_layer, logical_not_op)
-DEFINE_COMPUTE_OPS(abs_layer, abs_op)
-DEFINE_COMPUTE_OPS(negative_layer, negative_op)
-DEFINE_COMPUTE_OPS(sign_layer, sign_op)
-DEFINE_COMPUTE_OPS(round_layer, round_op)
-DEFINE_COMPUTE_OPS(ceil_layer, ceil_op)
-DEFINE_COMPUTE_OPS(floor_layer, floor_op)
-DEFINE_COMPUTE_OPS(reciprocal_layer, reciprocal_op)
-DEFINE_COMPUTE_OPS(square_layer, square_op)
-DEFINE_COMPUTE_OPS(sqrt_layer, sqrt_op)
-DEFINE_COMPUTE_OPS(rsqrt_layer, rsqrt_op)
-DEFINE_COMPUTE_OPS(safe_reciprocal_layer, safe_reciprocal_op)
-DEFINE_COMPUTE_OPS(exp_layer, exp_op)
-DEFINE_COMPUTE_OPS(expm1_layer, expm1_op)
-DEFINE_COMPUTE_OPS(log_layer, log_op)
-DEFINE_COMPUTE_OPS(log1p_layer, log1p_op)
-DEFINE_COMPUTE_OPS(cos_layer, cos_op)
-DEFINE_COMPUTE_OPS(sin_layer, sin_op)
-DEFINE_COMPUTE_OPS(tan_layer, tan_op)
-DEFINE_COMPUTE_OPS(acos_layer, acos_op)
-DEFINE_COMPUTE_OPS(asin_layer, asin_op)
-DEFINE_COMPUTE_OPS(atan_layer, atan_op)
-DEFINE_COMPUTE_OPS(cosh_layer, cosh_op)
-DEFINE_COMPUTE_OPS(sinh_layer, sinh_op)
-DEFINE_COMPUTE_OPS(tanh_layer, tanh_op)
-DEFINE_COMPUTE_OPS(acosh_layer, acosh_op)
-DEFINE_COMPUTE_OPS(asinh_layer, asinh_op)
-DEFINE_COMPUTE_OPS(atanh_layer, atanh_op)
-DEFINE_COMPUTE_OPS(erf_layer, erf_op)
-DEFINE_COMPUTE_OPS(erfinv_layer, erfinv_op)
-
-#define PROTO(T) \
-  UNARY_ETI_INST_MACRO_DEV_DT(logical_not_layer, T, El::Device::CPU); \
-  UNARY_ETI_INST_MACRO_DEV_DT(abs_layer, T, El::Device::CPU);         \
-  UNARY_ETI_INST_MACRO_DEV_DT(negative_layer, T, El::Device::CPU);    \
-  UNARY_ETI_INST_MACRO_DEV_DT(sign_layer, T, El::Device::CPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(round_layer, T, El::Device::CPU);       \
-  UNARY_ETI_INST_MACRO_DEV_DT(ceil_layer, T, El::Device::CPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(floor_layer, T, El::Device::CPU);       \
-  UNARY_ETI_INST_MACRO_DEV_DT(reciprocal_layer, T, El::Device::CPU);  \
-  UNARY_ETI_INST_MACRO_DEV_DT(square_layer, T, El::Device::CPU);      \
-  UNARY_ETI_INST_MACRO_DEV_DT(sqrt_layer, T, El::Device::CPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(rsqrt_layer, T, El::Device::CPU);       \
-  UNARY_ETI_INST_MACRO_DEV_DT(safe_reciprocal_layer, T, El::Device::CPU); \
-  UNARY_ETI_INST_MACRO_DEV_DT(exp_layer, T, El::Device::CPU);         \
-  UNARY_ETI_INST_MACRO_DEV_DT(expm1_layer, T, El::Device::CPU);       \
-  UNARY_ETI_INST_MACRO_DEV_DT(log_layer, T, El::Device::CPU);         \
-  UNARY_ETI_INST_MACRO_DEV_DT(log1p_layer, T, El::Device::CPU);       \
-  UNARY_ETI_INST_MACRO_DEV_DT(cos_layer, T, El::Device::CPU);         \
-  UNARY_ETI_INST_MACRO_DEV_DT(sin_layer, T, El::Device::CPU);         \
-  UNARY_ETI_INST_MACRO_DEV_DT(tan_layer, T, El::Device::CPU);         \
-  UNARY_ETI_INST_MACRO_DEV_DT(acos_layer, T, El::Device::CPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(asin_layer, T, El::Device::CPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(atan_layer, T, El::Device::CPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(cosh_layer, T, El::Device::CPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(sinh_layer, T, El::Device::CPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(tanh_layer, T, El::Device::CPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(acosh_layer, T, El::Device::CPU);       \
-  UNARY_ETI_INST_MACRO_DEV_DT(asinh_layer, T, El::Device::CPU);       \
-  UNARY_ETI_INST_MACRO_DEV_DT(atanh_layer, T, El::Device::CPU);       \
-  UNARY_ETI_INST_MACRO_DEV_DT(erf_layer, T, El::Device::CPU);         \
-  UNARY_ETI_INST_MACRO_DEV_DT(erfinv_layer, T, El::Device::CPU)
-
-#define LBANN_INSTANTIATE_CPU_HALF
-#include "lbann/macros/instantiate.hpp"
-
-} // namespace lbann
diff --git a/src/layers/math/unary.cu b/src/layers/math/unary.cu
deleted file mode 100644
index 0ecddefec94..00000000000
--- a/src/layers/math/unary.cu
+++ /dev/null
@@ -1,481 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-
-#define LBANN_UNARY_LAYER_INSTANTIATE
-#include "lbann/layers/math/unary.hpp"
-#include "lbann/utils/gpu/helpers.hpp"
-
-namespace lbann {
-
-namespace {
-
-// =========================================================
-// Operator objects for entry-wise unary layers
-// =========================================================
-// Note: Unary operator corresponds to forward prop step
-// (\f$ y = f(x) \f$) and binary operator corresponds to
-// back prop step
-// (\f$ \frac{dL}{dx} = \frac{dL}{dy} f'(x) \f$).
-
-/** Logical not operator. */
-template <typename TensorDataType>
-struct logical_not_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    const auto& b = x != TensorDataType(0.0) && !gpu_lib::isnan(x);
-    return !b ? TensorDataType(1.0) : TensorDataType(0.0);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return TensorDataType(0.0);
-  }
-};
-
-/** Absolute value operator. */
-template <typename TensorDataType>
-struct abs_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::abs(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    const TensorDataType zero = 0.;
-    if      (x > zero) { return dy;   }
-    else if (x < zero) { return -dy;  }
-    else               { return zero; }
-  }
-};
-
-/** Negative operator. */
-template <typename TensorDataType>
-struct negative_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return -x;
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return -dy;
-  }
-};
-
-/** Sign operator. */
-template <typename TensorDataType>
-struct sign_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    const TensorDataType zero = 0.;
-    const TensorDataType one = 1.;
-    if      (x > zero) { return one;  }
-    else if (x < zero) { return -one; }
-    else               { return zero; }
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return TensorDataType(0.0);
-  }
-};
-
-/** Round operator. */
-template <typename TensorDataType>
-struct round_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::round(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return TensorDataType(0.0);
-  }
-};
-
-/** Ceiling operator. */
-template <typename TensorDataType>
-struct ceil_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::ceil(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return TensorDataType(0.0);
-  }
-};
-
-/** Floor operator. */
-template <typename TensorDataType>
-struct floor_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::floor(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return TensorDataType(0.0);
-  }
-};
-
-/** Reciprocal operator. */
-template <typename TensorDataType>
-struct reciprocal_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return TensorDataType(1.) / x;
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    if (dy == TensorDataType(0.0)) { return TensorDataType(0.0); }
-    else                   { return - dy / (x*x); }
-
-  }
-};
-
-/** Square operator. */
-template <typename TensorDataType>
-struct square_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return x*x;
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return TensorDataType(2.) * x * dy;
-  }
-};
-
-
-/** Square root operator. */
-template <typename TensorDataType>
-struct sqrt_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::sqrt(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy / (TensorDataType(2.) * gpu_lib::sqrt(x));
-  }
-};
-
-/** Reciprocal square root operator. */
-template <typename TensorDataType>
-struct rsqrt_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::rsqrt(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    const auto& s = gpu_lib::sqrt(x);
-    return - dy / (TensorDataType(2.) * x * s);
-  }
-};
-
-/** Safe reciprocal operator.
- *  If a standard reciprocal produces an infinity or NaN, zero is
- *  output instead.
- */
-template <typename TensorDataType>
-struct safe_reciprocal_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    const auto& y = TensorDataType(1.) / x;
-    if (gpu_lib::isfinite(y)) { return y; }
-    else             { return TensorDataType(0.0); }
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    const auto& y = TensorDataType(1.) / x;
-    if (gpu_lib::isfinite(y)) { return - dy * y*y; }
-    else             { return TensorDataType(0.0); }
-  }
-};
-
-/** Exponential operator. */
-template <typename TensorDataType>
-struct exp_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::exp(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy * gpu_lib::exp(x);
-  }
-};
-
-/** Exponential minus one operator. */
-template <typename TensorDataType>
-struct expm1_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::expm1(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy * gpu_lib::exp(x);
-  }
-};
-
-/** Natural logarithm operator. */
-template <typename TensorDataType>
-struct log_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::log(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy / x;
-  }
-};
-
-/** Natural logarithm one plus operator. */
-template <typename TensorDataType>
-struct log1p_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::log1p(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy / (x + TensorDataType(1.0));
-  }
-};
-
-/** Cosine operator. */
-template <typename TensorDataType>
-struct cos_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::cos(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return -dy * gpu_lib::sin(x);
-  }
-};
-
-/** Sine operator. */
-template <typename TensorDataType>
-struct sin_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::sin(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy * gpu_lib::cos(x);
-  }
-};
-
-/** Tangent operator. */
-template <typename TensorDataType>
-struct tan_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::tan(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    const auto& c = gpu_lib::cos(x);
-    return dy / (c*c);
-  }
-};
-
-/** Arccosine operator. */
-template <typename TensorDataType>
-struct acos_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::acos(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return -dy / gpu_lib::sqrt(TensorDataType(1.0) - x*x);
-  }
-};
-
-/** Arcsine operator. */
-template <typename TensorDataType>
-struct asin_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::asin(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy / gpu_lib::sqrt(TensorDataType(1.0) - x*x);
-  }
-};
-
-/** Arctangent operator. */
-template <typename TensorDataType>
-struct atan_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::atan(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy / (TensorDataType(1.0) + x*x);
-  }
-};
-
-/** Hyperbolic cosine operator. */
-template <typename TensorDataType>
-struct cosh_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::cosh(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy * gpu_lib::sinh(x);
-  }
-};
-
-/** Hyperbolic sine operator. */
-template <typename TensorDataType>
-struct sinh_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::sinh(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy * gpu_lib::cosh(x);
-  }
-};
-
-/** Hyperbolic tangent operator. */
-template <typename TensorDataType>
-struct tanh_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::tanh(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    const auto& c = gpu_lib::cosh(x);
-    return dy / (c*c);
-  }
-};
-
-/** Hyperbolic arccosine operator. */
-template <typename TensorDataType>
-struct acosh_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::acosh(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return -dy / (gpu_lib::sqrt(x - TensorDataType(1.0)) * gpu_lib::sqrt(x + TensorDataType(1.0)));
-  }
-};
-
-/** Hyperbolic arcsine operator. */
-template <typename TensorDataType>
-struct asinh_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::asinh(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy / gpu_lib::sqrt(TensorDataType(1.0) + x*x);
-  }
-};
-
-/** Hyperbolic arctangent operator. */
-template <typename TensorDataType>
-struct atanh_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::atanh(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    return dy / (TensorDataType(1.0) - x*x);
-  }
-};
-
-/** Error function operator. */
-template <typename TensorDataType>
-struct erf_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::erf(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    const TensorDataType two_rsqrt_pi(1.12837916709551257389);
-    return dy * two_rsqrt_pi * gpu_lib::exp(-x*x);
-  }
-};
-
-/** Inverse error function operator. */
-template <typename TensorDataType>
-struct erfinv_op {
-  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
-    return gpu_lib::erfinv(x);
-  }
-  inline __device__ TensorDataType operator()(const TensorDataType& x, const TensorDataType& dy) const {
-    const TensorDataType half_sqrt_pi(0.88622692545275801364);
-    const auto& y = gpu_lib::erfinv(x);
-    return dy * half_sqrt_pi * gpu_lib::exp(y*y);
-  }
-};
-
-} // namespace
-
-// Template instantiation
-#define DEFINE_COMPUTE_OPS(layer, op)                                   \
-  template <typename TensorDataType, data_layout Layout, El::Device Device> \
-  void layer<TensorDataType, Layout, Device>::fp_compute() {            \
-    gpu_lib::apply_entrywise_unary_operator<op>(                           \
-      this->get_prev_activations(),                                     \
-      this->get_activations());                                         \
-  }                                                                     \
-  template <typename TensorDataType, data_layout Layout, El::Device Device> \
-  void layer<TensorDataType, Layout, Device>::bp_compute() {            \
-    gpu_lib::apply_entrywise_binary_operator<op>(                          \
-      this->get_prev_activations(),                                     \
-      this->get_prev_error_signals(),                                   \
-      this->get_error_signals());                                       \
-  }
-
-DEFINE_COMPUTE_OPS(logical_not_layer, logical_not_op)
-DEFINE_COMPUTE_OPS(abs_layer, abs_op)
-DEFINE_COMPUTE_OPS(negative_layer, negative_op)
-DEFINE_COMPUTE_OPS(sign_layer, sign_op)
-DEFINE_COMPUTE_OPS(round_layer, round_op)
-DEFINE_COMPUTE_OPS(ceil_layer, ceil_op)
-DEFINE_COMPUTE_OPS(floor_layer, floor_op)
-DEFINE_COMPUTE_OPS(reciprocal_layer, reciprocal_op)
-DEFINE_COMPUTE_OPS(square_layer, square_op)
-DEFINE_COMPUTE_OPS(sqrt_layer, sqrt_op)
-DEFINE_COMPUTE_OPS(rsqrt_layer, rsqrt_op)
-DEFINE_COMPUTE_OPS(safe_reciprocal_layer, safe_reciprocal_op)
-DEFINE_COMPUTE_OPS(exp_layer, exp_op)
-DEFINE_COMPUTE_OPS(expm1_layer, expm1_op)
-DEFINE_COMPUTE_OPS(log_layer, log_op)
-DEFINE_COMPUTE_OPS(log1p_layer, log1p_op)
-DEFINE_COMPUTE_OPS(cos_layer, cos_op)
-DEFINE_COMPUTE_OPS(sin_layer, sin_op)
-DEFINE_COMPUTE_OPS(tan_layer, tan_op)
-DEFINE_COMPUTE_OPS(acos_layer, acos_op)
-DEFINE_COMPUTE_OPS(asin_layer, asin_op)
-DEFINE_COMPUTE_OPS(atan_layer, atan_op)
-DEFINE_COMPUTE_OPS(cosh_layer, cosh_op)
-DEFINE_COMPUTE_OPS(sinh_layer, sinh_op)
-DEFINE_COMPUTE_OPS(tanh_layer, tanh_op)
-DEFINE_COMPUTE_OPS(acosh_layer, acosh_op)
-DEFINE_COMPUTE_OPS(asinh_layer, asinh_op)
-DEFINE_COMPUTE_OPS(atanh_layer, atanh_op)
-DEFINE_COMPUTE_OPS(erf_layer, erf_op)
-DEFINE_COMPUTE_OPS(erfinv_layer, erfinv_op)
-
-#define PROTO(T) \
-  UNARY_ETI_INST_MACRO_DEV_DT(logical_not_layer, T, El::Device::GPU); \
-  UNARY_ETI_INST_MACRO_DEV_DT(abs_layer, T, El::Device::GPU);         \
-  UNARY_ETI_INST_MACRO_DEV_DT(negative_layer, T, El::Device::GPU);    \
-  UNARY_ETI_INST_MACRO_DEV_DT(sign_layer, T, El::Device::GPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(round_layer, T, El::Device::GPU);       \
-  UNARY_ETI_INST_MACRO_DEV_DT(ceil_layer, T, El::Device::GPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(floor_layer, T, El::Device::GPU);       \
-  UNARY_ETI_INST_MACRO_DEV_DT(reciprocal_layer, T, El::Device::GPU);  \
-  UNARY_ETI_INST_MACRO_DEV_DT(square_layer, T, El::Device::GPU);      \
-  UNARY_ETI_INST_MACRO_DEV_DT(sqrt_layer, T, El::Device::GPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(rsqrt_layer, T, El::Device::GPU);       \
-  UNARY_ETI_INST_MACRO_DEV_DT(safe_reciprocal_layer, T, El::Device::GPU); \
-  UNARY_ETI_INST_MACRO_DEV_DT(exp_layer, T, El::Device::GPU);         \
-  UNARY_ETI_INST_MACRO_DEV_DT(expm1_layer, T, El::Device::GPU);       \
-  UNARY_ETI_INST_MACRO_DEV_DT(log_layer, T, El::Device::GPU);         \
-  UNARY_ETI_INST_MACRO_DEV_DT(log1p_layer, T, El::Device::GPU);       \
-  UNARY_ETI_INST_MACRO_DEV_DT(cos_layer, T, El::Device::GPU);         \
-  UNARY_ETI_INST_MACRO_DEV_DT(sin_layer, T, El::Device::GPU);         \
-  UNARY_ETI_INST_MACRO_DEV_DT(tan_layer, T, El::Device::GPU);         \
-  UNARY_ETI_INST_MACRO_DEV_DT(acos_layer, T, El::Device::GPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(asin_layer, T, El::Device::GPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(atan_layer, T, El::Device::GPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(cosh_layer, T, El::Device::GPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(sinh_layer, T, El::Device::GPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(tanh_layer, T, El::Device::GPU);        \
-  UNARY_ETI_INST_MACRO_DEV_DT(acosh_layer, T, El::Device::GPU);       \
-  UNARY_ETI_INST_MACRO_DEV_DT(asinh_layer, T, El::Device::GPU);       \
-  UNARY_ETI_INST_MACRO_DEV_DT(atanh_layer, T, El::Device::GPU);       \
-  UNARY_ETI_INST_MACRO_DEV_DT(erf_layer, T, El::Device::GPU);         \
-  UNARY_ETI_INST_MACRO_DEV_DT(erfinv_layer, T, El::Device::GPU)
-
-#define LBANN_INSTANTIATE_GPU_HALF
-#include "lbann/macros/instantiate.hpp"
-
-} // namespace lbann
diff --git a/src/models/unit_test/modify_test.cpp b/src/models/unit_test/modify_test.cpp
index b6b12ce0233..54d29500edc 100644
--- a/src/models/unit_test/modify_test.cpp
+++ b/src/models/unit_test/modify_test.cpp
@@ -29,7 +29,6 @@
 #include "MPITestHelpers.hpp"
 #include "TestHelpers.hpp"
 
-#include "lbann/layers/activations/activations.hpp"
 #include "lbann/layers/activations/relu.hpp"
 #include "lbann/layers/layer.hpp"
 #include <lbann/base.hpp>
diff --git a/src/operators/CMakeLists.txt b/src/operators/CMakeLists.txt
index 5c890a38339..d2c472154ca 100644
--- a/src/operators/CMakeLists.txt
+++ b/src/operators/CMakeLists.txt
@@ -2,6 +2,8 @@
 set(THIS_DIR_SOURCES)
 
 # Add the subdirectories
+add_subdirectory(activations)
+add_subdirectory(loss)
 add_subdirectory(math)
 
 # Propagate the files up the tree
diff --git a/src/operators/activations/CMakeLists.txt b/src/operators/activations/CMakeLists.txt
new file mode 100644
index 00000000000..b18030c4e1d
--- /dev/null
+++ b/src/operators/activations/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Add the source files for this directory
+set_full_path(THIS_DIR_SOURCES
+  activation_builders.cpp
+  activations.cpp
+  )
+
+if (LBANN_HAS_GPU)
+  # Add the CUDA source files for this directory
+  set_full_path(THIS_DIR_CU_SOURCES
+    activations.cu
+    )
+endif ()
+
+add_subdirectory(cereal_registration)
+
+# Propagate the files up the tree
+set(SOURCES "${SOURCES}" "${THIS_DIR_SOURCES}" PARENT_SCOPE)
+set(GPU_SOURCES "${GPU_SOURCES}" "${THIS_DIR_CU_SOURCES}" PARENT_SCOPE)
diff --git a/src/layers/activations/cereal_registration/softsign.cpp b/src/operators/activations/activation_builders.cpp
similarity index 63%
rename from src/layers/activations/cereal_registration/softsign.cpp
rename to src/operators/activations/activation_builders.cpp
index 6d1f8efd62b..6f5d2c39ef0 100644
--- a/src/layers/activations/cereal_registration/softsign.cpp
+++ b/src/operators/activations/activation_builders.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -23,22 +23,14 @@
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/activations/activations.hpp>
 
-namespace lbann {
+#include <lbann/operators/activations/activation_builders_impl.hpp>
 
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-softsign_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
+#define PROTO_DEVICE(T, D)                                                     \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(log_sigmoid, T, D);                   \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(selu, T, D);                          \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(sigmoid, T, D);                       \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(softplus, T, D);                      \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(softsign, T, D)
 
-} // namespace lbann
-
-#define LBANN_LAYER_NAME softsign_layer
-#include "lbann/macros/register_layer_with_cereal.hpp"
+#include <lbann/macros/instantiate_device.hpp>
diff --git a/src/operators/activations/activations.cpp b/src/operators/activations/activations.cpp
new file mode 100644
index 00000000000..fd049fc7855
--- /dev/null
+++ b/src/operators/activations/activations.cpp
@@ -0,0 +1,204 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/activations/activations.hpp"
+#include "../math/common.hpp"
+
+namespace lbann {
+
+namespace {
+
+// =========================================================
+// Operator objects for entry-wise unary layers
+// =========================================================
+// Note: Unary operator corresponds to forward prop step
+// (\f$ y = f(x) \f$) and binary operator corresponds to
+// back prop step
+// (\f$ \frac{dL}{dx} = \frac{dL}{dy} f'(x) \f$).
+
+/** Log sigmoid operator. */
+template <typename DataT>
+struct LogSigmoidOpImpl
+{
+  inline DataT operator()(DataT const& x) const noexcept
+  {
+    using std::log1p;
+    if (x >= El::TypeTraits<DataT>::Zero()) {
+      return -log1p(El::Exp(-x));
+    }
+    else {
+      return x - log1p(El::Exp(x));
+    }
+  }
+  inline DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    return dy / (El::TypeTraits<DataT>::One() + El::Exp(x));
+  }
+};
+
+/** SELU operator. */
+template <typename DataT>
+struct SeluOpImpl
+{
+  inline DataT operator()(DataT const& x) const noexcept
+  {
+    using std::expm1;
+    static auto const alpha = DataT(1.6732632423543772848170429916717);
+    static auto const scale = DataT(1.0507009873554804934193349852946);
+    static auto const zero = DataT(0.);
+    return (x > zero ? scale * x : scale * alpha * expm1(x));
+  }
+  inline DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    static auto const alpha = DataT(1.6732632423543772848170429916717);
+    static auto const scale = DataT(1.0507009873554804934193349852946);
+    static auto const zero = DataT(0.);
+    return (x > zero ? dy * scale : dy * scale * alpha * El::Exp(x));
+  }
+};
+
+/** Sigmoid operator. */
+template <typename DataT>
+struct SigmoidOpImpl
+{
+  DataT eps = std::numeric_limits<DataT>::epsilon();
+  inline DataT operator()(DataT const& x) const noexcept
+  {
+    static auto const one = El::TypeTraits<DataT>::One();
+    auto const& y = one / (one + El::Exp(-x));
+#ifdef LBANN_ENABLE_SIGMOID_CUTOFF
+    if (y <= eps) {
+      return eps;
+    }
+    else if (y >= one - eps) {
+      return one - eps;
+    }
+#endif // LBANN_ENABLE_SIGMOID_CUTOFF
+    return y;
+  }
+  inline DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    static auto const one = El::TypeTraits<DataT>::One();
+    auto const& y = one / (one + El::Exp(-x));
+#ifdef LBANN_ENABLE_SIGMOID_CUTOFF
+    if (y <= eps || y >= one - eps) {
+      return El::TypeTraits<DataT>::Zero();
+    }
+#endif // LBANN_ENABLE_SIGMOID_CUTOFF
+    return dy * y * (one - y);
+  }
+};
+
+/** Softplus operator. */
+template <typename DataT>
+struct SoftplusOpImpl
+{
+  inline DataT operator()(DataT const& x) const noexcept
+  {
+    using std::log1p;
+    if (x > El::TypeTraits<DataT>::Zero()) {
+      return log1p(El::Exp(-x)) + x;
+    }
+    else {
+      return log1p(El::Exp(x));
+    }
+  }
+  inline DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    return dy / (El::TypeTraits<DataT>::One() + El::Exp(-x));
+  }
+};
+
+/** Softsign operator. */
+template <typename DataT>
+struct SoftsignOpImpl
+{
+  inline DataT operator()(DataT const& x) const noexcept
+  {
+    using std::fabs;
+    return x / (El::TypeTraits<DataT>::One() + fabs(x));
+  }
+  inline DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    using std::fabs;
+    auto const& denom = El::TypeTraits<DataT>::One() + fabs(x);
+    return dy / (denom * denom);
+  }
+};
+
+} // namespace
+
+// Template instantiation
+#define DEFINE_COMPUTE_OPS(OP_NAME)                                            \
+  template <typename DataT, El::Device Device>                                 \
+  void OP_NAME##Operator<DataT, Device>::fp_compute_local(                     \
+    std::vector<ConstLocalInputTensorType> inputs,                             \
+    std::vector<LocalOutputTensorType> outputs) const                          \
+  {                                                                            \
+    LBANN_ASSERT_DEBUG(inputs.size() == 1);                                    \
+    LBANN_ASSERT_DEBUG(outputs.size() == 1);                                   \
+    auto const& input = inputs.front().data();                                 \
+    auto& output = outputs.front().data();                                     \
+    El::EntrywiseMap(                                                          \
+      input,                                                                   \
+      output,                                                                  \
+      std::function<DataT(DataT const&)>(OP_NAME##OpImpl<DataT>{}));           \
+  }                                                                            \
+  template <typename DataT, El::Device Device>                                 \
+  void OP_NAME##Operator<DataT, Device>::bp_compute_local(                     \
+    std::vector<ConstLocalInputTensorType> inputs,                             \
+    std::vector<ConstLocalOutputTensorType> grads_wrt_outputs,                 \
+    std::vector<LocalInputTensorType> grads_wrt_inputs) const                  \
+  {                                                                            \
+    LBANN_ASSERT_DEBUG(inputs.size() == 1);                                    \
+    LBANN_ASSERT_DEBUG(grads_wrt_outputs.size() == 1);                         \
+    LBANN_ASSERT_DEBUG(grads_wrt_inputs.size() == 1);                          \
+    auto const& input = inputs.front().data();                                 \
+    auto const& grad_wrt_output = grads_wrt_outputs.front().data();            \
+    auto& grad_wrt_input = grads_wrt_inputs.front().data();                    \
+    internal::EntrywiseZipInto(input,                                          \
+                               grad_wrt_output,                                \
+                               grad_wrt_input,                                 \
+                               OP_NAME##OpImpl<DataT>{});                      \
+  }
+
+DEFINE_COMPUTE_OPS(LogSigmoid)
+DEFINE_COMPUTE_OPS(Selu)
+DEFINE_COMPUTE_OPS(Sigmoid)
+DEFINE_COMPUTE_OPS(Softplus)
+DEFINE_COMPUTE_OPS(Softsign)
+
+#define PROTO(T)                                                               \
+  template class LogSigmoidOperator<T, El::Device::CPU>;                       \
+  template class SeluOperator<T, El::Device::CPU>;                             \
+  template class SigmoidOperator<T, El::Device::CPU>;                          \
+  template class SoftplusOperator<T, El::Device::CPU>;                         \
+  template class SoftsignOperator<T, El::Device::CPU>
+
+#define LBANN_INSTANTIATE_CPU_HALF
+#include "lbann/macros/instantiate.hpp"
+
+} // namespace lbann
diff --git a/src/operators/activations/activations.cu b/src/operators/activations/activations.cu
new file mode 100644
index 00000000000..68c63c2b1d2
--- /dev/null
+++ b/src/operators/activations/activations.cu
@@ -0,0 +1,197 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/activations/activations.hpp"
+
+#include "lbann/base.hpp"
+#include "lbann/utils/gpu/helpers.hpp"
+
+#include "../math/common.cuh"
+
+namespace lbann {
+
+namespace {
+
+// =========================================================
+// Operator objects for entry-wise unary layers
+// =========================================================
+// Note: Unary operator corresponds to forward prop step
+// (\f$ y = f(x) \f$) and binary operator corresponds to
+// back prop step
+// (\f$ \frac{dL}{dx} = \frac{dL}{dy} f'(x) \f$).
+
+/** Log sigmoid operator. */
+template <typename DataT>
+struct LogSigmoidOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const noexcept
+  {
+    if (x >= DataT(0.0)) {
+      return -gpu_lib::log1p(gpu_lib::exp(-x));
+    } else {
+      return x - gpu_lib::log1p(gpu_lib::exp(x));
+    }
+  }
+  inline __device__ DataT operator()(DataT const& x,
+                                     DataT const& dy) const noexcept
+  {
+    return dy / (DataT(1.0) + gpu_lib::exp(x));
+  }
+};
+
+/** SELU operator. */
+template <typename DataT>
+struct SeluOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const noexcept
+  {
+    DataT const alpha = 1.6732632423543772848170429916717;
+    DataT const scale = 1.0507009873554804934193349852946;
+    return (x > DataT(0.0) ?
+            scale * x :
+            scale * alpha * gpu_lib::expm1(x));
+  }
+  inline __device__ DataT operator()(DataT const& x,
+                                     DataT const& dy) const noexcept
+  {
+    DataT const alpha = 1.6732632423543772848170429916717;
+    DataT const scale = 1.0507009873554804934193349852946;
+    return (x > DataT(0.0) ?
+            dy * scale :
+            dy * scale * alpha * gpu_lib::exp(x));
+  }
+};
+
+/** Sigmoid operator. */
+template <typename DataT>
+struct SigmoidOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const noexcept
+  {
+    DataT const one = 1.;
+    auto const& y = one / (one + gpu_lib::exp(-x));
+#ifdef LBANN_ENABLE_SIGMOID_CUTOFF
+    auto const eps = gpu_lib::epsilon<DataT>();
+    if (y <= eps) { return eps; }
+    else if (y >= one - eps) { return one - eps; }
+#endif // LBANN_ENABLE_SIGMOID_CUTOFF
+    return y;
+  }
+  inline __device__ DataT operator()(DataT const& x,
+                                     DataT const& dy) const noexcept
+  {
+    DataT const one = 1.;
+    auto const& y = one / (one + gpu_lib::exp(-x));
+#ifdef LBANN_ENABLE_SIGMOID_CUTOFF
+    auto const eps = gpu_lib::epsilon<DataT>();
+    if (y <= eps || y >= one - eps) { return DataT(0.0); }
+#endif // LBANN_ENABLE_SIGMOID_CUTOFF
+    return dy * y * (one - y);
+  }
+};
+
+/** Softplus operator. */
+template <typename DataT>
+struct SoftplusOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const noexcept
+  {
+    if (x > DataT(0.0)) {
+      return gpu_lib::log1p(gpu_lib::exp(-x)) + x;
+    } else {
+      return gpu_lib::log1p(gpu_lib::exp(x));
+    }
+  }
+  inline __device__ DataT operator()(DataT const& x,
+                                     DataT const& dy) const noexcept
+  {
+    return dy / (DataT(1.0) + gpu_lib::exp(-x));
+  }
+};
+
+/** Softsign operator. */
+template <typename DataT>
+struct SoftsignOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const noexcept
+  {
+    return x / (DataT(1.0) + gpu_lib::abs(x));
+  }
+  inline __device__ DataT operator()(DataT const& x,
+                                     DataT const& dy) const noexcept
+  {
+    auto const& denom = DataT(1.0) + gpu_lib::abs(x);
+    return dy / (denom * denom);
+  }
+};
+
+} // namespace
+
+// Template instantiation
+#define DEFINE_COMPUTE_OPS(OP_NAME)                                     \
+  template <typename DataT, El::Device Device>                          \
+  void OP_NAME##Operator<DataT, Device>::fp_compute_local(              \
+    std::vector<ConstLocalInputTensorType> inputs,                      \
+    std::vector<LocalOutputTensorType> outputs) const                   \
+  {                                                                     \
+    LBANN_ASSERT_DEBUG(inputs.size() == 1);                             \
+    LBANN_ASSERT_DEBUG(outputs.size() == 1);                            \
+    auto const& input = inputs.front().data();                          \
+    auto& output = outputs.front().data();                              \
+    El::EntrywiseMap(input,                                             \
+                     output,                                            \
+                     OP_NAME##OpImpl<DataT>{});                         \
+  }                                                                     \
+  template <typename DataT, El::Device Device>                          \
+  void OP_NAME##Operator<DataT, Device>::bp_compute_local(              \
+    std::vector<ConstLocalInputTensorType> inputs,                      \
+    std::vector<ConstLocalOutputTensorType> grads_wrt_outputs,          \
+    std::vector<LocalInputTensorType> grads_wrt_inputs) const           \
+  {                                                                     \
+    LBANN_ASSERT_DEBUG(inputs.size() == 1);                             \
+    LBANN_ASSERT_DEBUG(grads_wrt_outputs.size() == 1);                  \
+    LBANN_ASSERT_DEBUG(grads_wrt_inputs.size() == 1);                   \
+    auto const& input = inputs.front().data();                          \
+    auto const& grad_wrt_output = grads_wrt_outputs.front().data();     \
+    auto& grad_wrt_input = grads_wrt_inputs.front().data();             \
+    internal::EntrywiseZipInto(input,                                   \
+                               grad_wrt_output,                         \
+                               grad_wrt_input,                          \
+                               OP_NAME##OpImpl<DataT>{});               \
+  }
+
+DEFINE_COMPUTE_OPS(LogSigmoid)
+DEFINE_COMPUTE_OPS(Selu)
+DEFINE_COMPUTE_OPS(Sigmoid)
+DEFINE_COMPUTE_OPS(Softplus)
+DEFINE_COMPUTE_OPS(Softsign)
+
+#define PROTO(T)                                                \
+  template class LogSigmoidOperator<T, El::Device::GPU>;        \
+  template class SeluOperator<T, El::Device::GPU>;              \
+  template class SigmoidOperator<T, El::Device::GPU>;           \
+  template class SoftplusOperator<T, El::Device::GPU>;          \
+  template class SoftsignOperator<T, El::Device::GPU>
+
+#define LBANN_INSTANTIATE_GPU_HALF
+#include "lbann/macros/instantiate.hpp"
+
+} // namespace lbann
diff --git a/src/operators/activations/cereal_registration/CMakeLists.txt b/src/operators/activations/cereal_registration/CMakeLists.txt
new file mode 100644
index 00000000000..b3be3a148c9
--- /dev/null
+++ b/src/operators/activations/cereal_registration/CMakeLists.txt
@@ -0,0 +1,11 @@
+# Add the source files for this directory
+set_full_path(THIS_DIR_SOURCES
+  log_sigmoid.cpp
+  selu.cpp
+  sigmoid.cpp
+  softplus.cpp
+  softsign.cpp
+  )
+
+# Propagate the files up the tree
+set(SOURCES "${SOURCES}" "${THIS_DIR_SOURCES}" PARENT_SCOPE)
diff --git a/src/operators/activations/cereal_registration/log_sigmoid.cpp b/src/operators/activations/cereal_registration/log_sigmoid.cpp
new file mode 100644
index 00000000000..b2540955771
--- /dev/null
+++ b/src/operators/activations/cereal_registration/log_sigmoid.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/activations/activations.hpp"
+
+#define LBANN_OPERATOR_NAME LogSigmoidOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/activations/cereal_registration/selu.cpp b/src/operators/activations/cereal_registration/selu.cpp
new file mode 100644
index 00000000000..cf361411a53
--- /dev/null
+++ b/src/operators/activations/cereal_registration/selu.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/activations/activations.hpp"
+
+#define LBANN_OPERATOR_NAME SeluOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/activations/cereal_registration/sigmoid.cpp b/src/operators/activations/cereal_registration/sigmoid.cpp
new file mode 100644
index 00000000000..fe6cf3c0229
--- /dev/null
+++ b/src/operators/activations/cereal_registration/sigmoid.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/activations/activations.hpp"
+
+#define LBANN_OPERATOR_NAME SigmoidOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/activations/cereal_registration/softplus.cpp b/src/operators/activations/cereal_registration/softplus.cpp
new file mode 100644
index 00000000000..866c4efe692
--- /dev/null
+++ b/src/operators/activations/cereal_registration/softplus.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/activations/activations.hpp"
+
+#define LBANN_OPERATOR_NAME SoftplusOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/activations/cereal_registration/softsign.cpp b/src/operators/activations/cereal_registration/softsign.cpp
new file mode 100644
index 00000000000..66ecf9e3b24
--- /dev/null
+++ b/src/operators/activations/cereal_registration/softsign.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/activations/activations.hpp"
+
+#define LBANN_OPERATOR_NAME SoftsignOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/loss/CMakeLists.txt b/src/operators/loss/CMakeLists.txt
new file mode 100644
index 00000000000..709a559ab8e
--- /dev/null
+++ b/src/operators/loss/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Add the source files for this directory
+set_full_path(THIS_DIR_SOURCES
+  entrywise.cpp
+  loss_builders.cpp
+  )
+
+if (LBANN_HAS_GPU)
+  # Add the CUDA source files for this directory
+  set_full_path(THIS_DIR_CU_SOURCES
+    entrywise.cu
+    )
+endif ()
+
+add_subdirectory(cereal_registration)
+
+# Propagate the files up the tree
+set(SOURCES "${SOURCES}" "${THIS_DIR_SOURCES}" PARENT_SCOPE)
+set(GPU_SOURCES "${GPU_SOURCES}" "${THIS_DIR_CU_SOURCES}" PARENT_SCOPE)
diff --git a/src/operators/loss/cereal_registration/CMakeLists.txt b/src/operators/loss/cereal_registration/CMakeLists.txt
new file mode 100644
index 00000000000..993da4512d8
--- /dev/null
+++ b/src/operators/loss/cereal_registration/CMakeLists.txt
@@ -0,0 +1,11 @@
+# Add the source files for this directory
+set_full_path(THIS_DIR_SOURCES
+  binary_cross_entropy.cpp
+  boolean_accuracy.cpp
+  boolean_false_negative.cpp
+  boolean_false_positive.cpp
+  sigmoid_binary_cross_entropy.cpp
+  )
+
+# Propagate the files up the tree
+set(SOURCES "${SOURCES}" "${THIS_DIR_SOURCES}" PARENT_SCOPE)
diff --git a/src/operators/loss/cereal_registration/binary_cross_entropy.cpp b/src/operators/loss/cereal_registration/binary_cross_entropy.cpp
new file mode 100644
index 00000000000..6cf98330e74
--- /dev/null
+++ b/src/operators/loss/cereal_registration/binary_cross_entropy.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/loss/entrywise.hpp"
+
+#define LBANN_OPERATOR_NAME BinaryCrossEntropyOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/loss/cereal_registration/boolean_accuracy.cpp b/src/operators/loss/cereal_registration/boolean_accuracy.cpp
new file mode 100644
index 00000000000..a5101b82459
--- /dev/null
+++ b/src/operators/loss/cereal_registration/boolean_accuracy.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/loss/entrywise.hpp"
+
+#define LBANN_OPERATOR_NAME BooleanAccuracyOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/loss/cereal_registration/boolean_false_negative.cpp b/src/operators/loss/cereal_registration/boolean_false_negative.cpp
new file mode 100644
index 00000000000..b3a7120af15
--- /dev/null
+++ b/src/operators/loss/cereal_registration/boolean_false_negative.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/loss/entrywise.hpp"
+
+#define LBANN_OPERATOR_NAME BooleanFalseNegativeOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/loss/cereal_registration/boolean_false_positive.cpp b/src/operators/loss/cereal_registration/boolean_false_positive.cpp
new file mode 100644
index 00000000000..e5271ceaca5
--- /dev/null
+++ b/src/operators/loss/cereal_registration/boolean_false_positive.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/loss/entrywise.hpp"
+
+#define LBANN_OPERATOR_NAME BooleanFalsePositiveOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/loss/cereal_registration/sigmoid_binary_cross_entropy.cpp b/src/operators/loss/cereal_registration/sigmoid_binary_cross_entropy.cpp
new file mode 100644
index 00000000000..1f187130902
--- /dev/null
+++ b/src/operators/loss/cereal_registration/sigmoid_binary_cross_entropy.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/loss/entrywise.hpp"
+
+#define LBANN_OPERATOR_NAME SigmoidBinaryCrossEntropyOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/loss/entrywise.cpp b/src/operators/loss/entrywise.cpp
new file mode 100644
index 00000000000..898cd2b0983
--- /dev/null
+++ b/src/operators/loss/entrywise.cpp
@@ -0,0 +1,246 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/loss/entrywise.hpp"
+#include "../math/common.hpp"
+
+namespace lbann {
+
+namespace {
+
+// =========================================================
+// Operator objects for entry-wise binary layers
+// =========================================================
+// Note: Binary operator corresponds to forward prop step
+// (\f$ y = f(x_1,x_2) \f$) and 5-ary operator corresponds
+// to back prop step
+// (\f$ \frac{dL}{dx_i} = \frac{dL}{dy} \frac{df}{dx_i}(x_1,x_2) \f$).
+
+/** Binary cross entropy operator. */
+template <typename DataT>
+struct BinaryCrossEntropyOpImpl
+{
+  inline DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    static auto const zero = El::TypeTraits<DataT>::Zero();
+    static auto const one = El::TypeTraits<DataT>::One();
+    DataT y = zero;
+    if (x2 > zero) {
+      y += -x2 * std::log(x1);
+    }
+    if (x2 < one) {
+      y += -(one - x2) * std::log(one - x1);
+    }
+    return y;
+  }
+  inline void operator()(DataT const& x1,
+                         DataT const& x2,
+                         DataT const& dy,
+                         DataT& dx1,
+                         DataT& dx2) const noexcept
+  {
+    static auto const zero = El::TypeTraits<DataT>::Zero();
+    static auto const one = El::TypeTraits<DataT>::One();
+    dx2 = dx1 = zero;
+    if (dy == zero) {
+      return;
+    }
+    if (x2 > zero) {
+      dx1 += -x2 / x1 * dy;
+      dx2 += -std::log(x1) * dy;
+    }
+    if (x2 < one) {
+      dx1 += (one - x2) / (one - x1) * dy;
+      dx2 += std::log(one - x1) * dy;
+    }
+  }
+};
+
+/** Sigmoid binary cross entropy operator.
+ *  Equivalent to applying a sigmoid function to the first operand and
+ *  then computing the binary cross entropy. Numerically stable
+ *  implementation is taken from
+ *  https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits.
+ */
+template <typename DataT>
+struct SigmoidBinaryCrossEntropyOpImpl
+{
+  inline DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    using std::exp;
+    using std::log1p;
+    static auto const zero = El::TypeTraits<DataT>::Zero();
+    static auto const one = El::TypeTraits<DataT>::One();
+    auto const& z = std::max(zero, std::min(x2, one));
+    if (x1 > zero) {
+      return (one - z) * x1 + log1p(exp(-x1));
+    }
+    else {
+      return -x1 * z + log1p(exp(x1));
+    }
+  }
+  inline void operator()(DataT const& x1,
+                         DataT const& x2,
+                         DataT const& dy,
+                         DataT& dx1,
+                         DataT& dx2) const noexcept
+  {
+    using std::exp;
+    using std::log1p;
+    static auto const zero = El::TypeTraits<DataT>::Zero();
+    static auto const one = El::TypeTraits<DataT>::One();
+    auto const& z = std::max(zero, std::min(x2, one));
+    if (x1 > zero) {
+      dx1 = -z + one / (one + exp(-x1));
+    }
+    else {
+      dx1 = one - z - one / (one + exp(x1));
+    }
+    dx1 *= dy;
+    dx2 = (x2 == z) ? -x1 * dy : zero;
+  }
+};
+
+/** Boolean accuracy operator. */
+template <typename DataT>
+struct BooleanAccuracyOpImpl
+{
+  inline DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    auto const& b1 = x1 >= DataT(0.5);
+    auto const& b2 = x2 >= DataT(0.5);
+    return b1 == b2 ? El::TypeTraits<DataT>::One()
+                    : El::TypeTraits<DataT>::Zero();
+  }
+  inline void operator()(DataT const& x1,
+                         DataT const& x2,
+                         DataT const& dy,
+                         DataT& dx1,
+                         DataT& dx2) const noexcept
+  {
+    dx2 = dx1 = El::TypeTraits<DataT>::Zero();
+  }
+};
+
+/** Boolean false negative operator. */
+template <typename DataT>
+struct BooleanFalseNegativeOpImpl
+{
+  inline DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    auto const& b1 = x1 >= DataT(0.5);
+    auto const& b2 = x2 >= DataT(0.5);
+    return (!b1 && b2) ? El::TypeTraits<DataT>::One()
+                       : El::TypeTraits<DataT>::Zero();
+  }
+  inline void operator()(DataT const& x1,
+                         DataT const& x2,
+                         DataT const& dy,
+                         DataT& dx1,
+                         DataT& dx2) const noexcept
+  {
+    dx2 = dx1 = El::TypeTraits<DataT>::Zero();
+  }
+};
+
+/** Boolean false positive operator. */
+template <typename DataT>
+struct BooleanFalsePositiveOpImpl
+{
+  inline DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    auto const& b1 = x1 >= DataT(0.5);
+    auto const& b2 = x2 >= DataT(0.5);
+    return (b1 && !b2) ? El::TypeTraits<DataT>::One()
+                       : El::TypeTraits<DataT>::Zero();
+  }
+  inline void operator()(DataT const& x1,
+                         DataT const& x2,
+                         DataT const& dy,
+                         DataT& dx1,
+                         DataT& dx2) const noexcept
+  {
+    dx2 = dx1 = El::TypeTraits<DataT>::Zero();
+  }
+};
+
+} // namespace
+
+#define DEFINE_COMPUTE_OPS(OP_NAME)                                            \
+  template <typename DataT, El::Device Device>                                 \
+  void OP_NAME##Operator<DataT, Device>::fp_compute_local(                     \
+    std::vector<ConstLocalInputTensorType> inputs,                             \
+    std::vector<LocalOutputTensorType> outputs) const                          \
+  {                                                                            \
+    LBANN_ASSERT_DEBUG(inputs.size() == 2);                                    \
+    LBANN_ASSERT_DEBUG(outputs.size() == 1);                                   \
+    auto const& input0 = inputs[0].data();                                     \
+    auto const& input1 = inputs[1].data();                                     \
+    auto& output = outputs.front().data();                                     \
+    internal::EntrywiseZipInto(input0,                                         \
+                               input1,                                         \
+                               output,                                         \
+                               OP_NAME##OpImpl<DataT>{});                      \
+  }                                                                            \
+  template <typename DataT, El::Device Device>                                 \
+  void OP_NAME##Operator<DataT, Device>::bp_compute_local(                     \
+    std::vector<ConstLocalInputTensorType> inputs,                             \
+    std::vector<ConstLocalOutputTensorType> grads_wrt_outputs,                 \
+    std::vector<LocalInputTensorType> grads_wrt_inputs) const                  \
+  {                                                                            \
+    LBANN_ASSERT_DEBUG(inputs.size() == 2);                                    \
+    LBANN_ASSERT_DEBUG(grads_wrt_outputs.size() == 1);                         \
+    LBANN_ASSERT_DEBUG(grads_wrt_inputs.size() == 2);                          \
+    auto const& input0 = inputs[0].data();                                     \
+    auto const& input1 = inputs[1].data();                                     \
+    auto const& grad_wrt_output = grads_wrt_outputs.front().data();            \
+    auto& grad_wrt_input0 = grads_wrt_inputs[0].data();                        \
+    auto& grad_wrt_input1 = grads_wrt_inputs[1].data();                        \
+    internal::apply_binary_backprop_operator(input0,                           \
+                                             input1,                           \
+                                             grad_wrt_output,                  \
+                                             grad_wrt_input0,                  \
+                                             grad_wrt_input1,                  \
+                                             OP_NAME##OpImpl<DataT>{});        \
+  }
+
+DEFINE_COMPUTE_OPS(BinaryCrossEntropy)
+DEFINE_COMPUTE_OPS(SigmoidBinaryCrossEntropy)
+DEFINE_COMPUTE_OPS(BooleanAccuracy)
+DEFINE_COMPUTE_OPS(BooleanFalseNegative)
+DEFINE_COMPUTE_OPS(BooleanFalsePositive)
+
+#define PROTO(T)                                                               \
+  template class BinaryCrossEntropyOperator<T, El::Device::CPU>;               \
+  template class SigmoidBinaryCrossEntropyOperator<T, El::Device::CPU>;        \
+  template class BooleanAccuracyOperator<T, El::Device::CPU>;                  \
+  template class BooleanFalseNegativeOperator<T, El::Device::CPU>;             \
+  template class BooleanFalsePositiveOperator<T, El::Device::CPU>
+
+#define LBANN_INSTANTIATE_CPU_HALF
+#include "lbann/macros/instantiate.hpp"
+
+} // namespace lbann
diff --git a/src/operators/loss/entrywise.cu b/src/operators/loss/entrywise.cu
new file mode 100644
index 00000000000..1e15ddf82f2
--- /dev/null
+++ b/src/operators/loss/entrywise.cu
@@ -0,0 +1,243 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/loss/entrywise.hpp"
+#include "lbann/utils/gpu/helpers.hpp"
+
+#include "../math/common.cuh"
+
+namespace lbann {
+
+namespace {
+
+// =========================================================
+// Operator objects for entry-wise binary layers
+// =========================================================
+// Note: Binary operator corresponds to forward prop step
+// (\f$ y = f(x_1,x_2) \f$) and 5-ary operator corresponds
+// to back prop step
+// (\f$ \frac{dL}{dx_i} = \frac{dL}{dy} \frac{df}{dx_i}(x_1,x_2) \f$).
+
+/** Binary cross entropy operator. */
+template <typename DataT>
+struct BinaryCrossEntropyOpImpl
+{
+  inline __device__ DataT operator()(DataT const& x1,
+                                     DataT const& x2) const
+  {
+    DataT const zero = 0.;
+    DataT const one = 1.;
+    DataT y = zero;
+    if (x2 > zero) { y += -x2 * gpu_lib::log(x1); }
+    if (x2 < one)  { y += -(one-x2) * gpu_lib::log(one-x1); }
+    return y;
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    DataT const zero = 0.;
+    DataT const one = 1.;
+    dx1 = zero;
+    dx2 = zero;
+    if (dy == zero) { return; }
+    if (x2 > zero) {
+      dx1 += -x2 / x1 * dy;
+      dx2 += -gpu_lib::log(x1) * dy;
+    }
+    if (x2 < one)  {
+      dx1 += (one-x2) / (one-x1) * dy;
+      dx2 += gpu_lib::log(one-x1) * dy;
+    }
+  }
+};
+
+/** Sigmoid binary cross entropy operator.
+ *  Equivalent to applying a sigmoid function to the first operand and
+ *  then computing the binary cross entropy. Numerically stable
+ *  implementation is taken from
+ *  https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits.
+ */
+template <typename DataT>
+struct SigmoidBinaryCrossEntropyOpImpl
+{
+  inline __device__ DataT operator()(DataT const& x1,
+                                     DataT const& x2) const
+  {
+    DataT const zero = 0.;
+    DataT const one = 1.;
+    auto const& z = gpu_lib::max(zero, gpu_lib::min(x2, one));
+    if (x1 > zero) {
+      return (one - z) * x1 + gpu_lib::log1p(gpu_lib::exp(-x1));
+    } else {
+      return - x1 * z + gpu_lib::log1p(gpu_lib::exp(x1));
+    }
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    DataT const zero = 0.;
+    DataT const one = 1.;
+    auto const& z = gpu_lib::max(zero, gpu_lib::min(x2, one));
+    if (x1 > zero) {
+      dx1 = -z + one / (one + gpu_lib::exp(-x1));
+    } else {
+      dx1 = one - z - one / (one + gpu_lib::exp(x1));
+    }
+    dx1 *= dy;
+    dx2 = (x2 == z) ? -x1 * dy : zero;
+  }
+};
+
+/** Boolean accuracy operator. */
+template <typename DataT>
+struct BooleanAccuracyOpImpl
+{
+  inline __device__ DataT operator()(DataT const& x1,
+                                     DataT const& x2) const
+  {
+    auto const& b1 = x1 >= DataT(0.5);
+    auto const& b2 = x2 >= DataT(0.5);
+    return b1 == b2 ? DataT(1.0) : DataT(0.0);
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    dx1 = DataT(0.0);
+    dx2 = DataT(0.0);
+  }
+};
+
+/** Boolean false negative operator. */
+template <typename DataT>
+struct BooleanFalseNegativeOpImpl
+{
+  inline __device__ DataT operator()(DataT const& x1,
+                                     DataT const& x2) const
+  {
+    auto const& b1 = x1 >= DataT(0.5);
+    auto const& b2 = x2 >= DataT(0.5);
+    return (!b1 && b2) ? DataT(1.0) : DataT(0.0);
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    dx1 = DataT(0.0);
+    dx2 = DataT(0.0);
+  }
+};
+
+/** Boolean false positive operator. */
+template <typename DataT>
+struct BooleanFalsePositiveOpImpl
+{
+  inline __device__ DataT operator()(DataT const& x1,
+                                     DataT const& x2) const
+  {
+    auto const& b1 = x1 >= DataT(0.5);
+    auto const& b2 = x2 >= DataT(0.5);
+    return (b1 && !b2) ? DataT(1.0) : DataT(0.0);
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    dx1 = DataT(0.0);
+    dx2 = DataT(0.0);
+  }
+};
+
+} // namespace
+
+// Template instantiation
+#define DEFINE_COMPUTE_OPS(OP_NAME)                                     \
+  template <typename DataT, El::Device Device>                          \
+  void OP_NAME##Operator<DataT, Device>::fp_compute_local(              \
+    std::vector<ConstLocalInputTensorType> inputs,                      \
+    std::vector<LocalOutputTensorType> outputs) const                   \
+  {                                                                     \
+    LBANN_ASSERT_DEBUG(inputs.size() == 2);                             \
+    LBANN_ASSERT_DEBUG(outputs.size() == 1);                            \
+    auto const& input0 = inputs[0].data();                              \
+    auto const& input1 = inputs[1].data();                              \
+    auto& output = outputs.front().data();                              \
+    internal::EntrywiseZipInto(input0,                                  \
+                               input1,                                  \
+                               output,                                  \
+                               OP_NAME##OpImpl<DataT>{});               \
+  }                                                                     \
+  template <typename DataT, El::Device Device>                          \
+  void OP_NAME##Operator<DataT, Device>::bp_compute_local(              \
+    std::vector<ConstLocalInputTensorType> inputs,                      \
+    std::vector<ConstLocalOutputTensorType> grads_wrt_outputs,          \
+    std::vector<LocalInputTensorType> grads_wrt_inputs) const           \
+  {                                                                     \
+    LBANN_ASSERT_DEBUG(inputs.size() == 2);                             \
+    LBANN_ASSERT_DEBUG(grads_wrt_outputs.size() == 1);                  \
+    LBANN_ASSERT_DEBUG(grads_wrt_inputs.size() == 2);                   \
+    auto const& input0 = inputs[0].data();                              \
+    auto const& input1 = inputs[1].data();                              \
+    auto const& grad_wrt_output = grads_wrt_outputs.front().data();     \
+    auto& grad_wrt_input0 = grads_wrt_inputs[0].data();                 \
+    auto& grad_wrt_input1 = grads_wrt_inputs[1].data();                 \
+    internal::apply_binary_backprop_operator(input0,                    \
+                                             input1,                    \
+                                             grad_wrt_output,           \
+                                             grad_wrt_input0,           \
+                                             grad_wrt_input1,           \
+                                             OP_NAME##OpImpl<DataT>{}); \
+  }
+
+DEFINE_COMPUTE_OPS(BinaryCrossEntropy)
+DEFINE_COMPUTE_OPS(SigmoidBinaryCrossEntropy)
+DEFINE_COMPUTE_OPS(BooleanAccuracy)
+DEFINE_COMPUTE_OPS(BooleanFalseNegative)
+DEFINE_COMPUTE_OPS(BooleanFalsePositive)
+
+#define PROTO(T)                                                        \
+  template class BinaryCrossEntropyOperator<T, El::Device::GPU>;        \
+  template class SigmoidBinaryCrossEntropyOperator<T, El::Device::GPU>; \
+  template class BooleanAccuracyOperator<T, El::Device::GPU>;           \
+  template class BooleanFalseNegativeOperator<T, El::Device::GPU>;      \
+  template class BooleanFalsePositiveOperator<T, El::Device::GPU>
+
+#define LBANN_INSTANTIATE_GPU_HALF
+#include "lbann/macros/instantiate.hpp"
+
+} // namespace lbann
diff --git a/src/layers/activations/cereal_registration/softplus.cpp b/src/operators/loss/loss_builders.cpp
similarity index 63%
rename from src/layers/activations/cereal_registration/softplus.cpp
rename to src/operators/loss/loss_builders.cpp
index e44cc24b430..27484e65b7f 100644
--- a/src/layers/activations/cereal_registration/softplus.cpp
+++ b/src/operators/loss/loss_builders.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -23,22 +23,14 @@
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/activations/activations.hpp>
 
-namespace lbann {
+#include <lbann/operators/loss/loss_builders_impl.hpp>
 
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-softplus_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
+#define PROTO_DEVICE(T, D)                                                     \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(binary_cross_entropy, T, D);          \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(boolean_accuracy, T, D);              \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(boolean_false_negative, T, D);        \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(boolean_false_positive, T, D); \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(sigmoid_binary_cross_entropy, T, D)
 
-} // namespace lbann
-
-#define LBANN_LAYER_NAME softplus_layer
-#include "lbann/macros/register_layer_with_cereal.hpp"
+#include <lbann/macros/instantiate_device.hpp>
diff --git a/src/operators/math/CMakeLists.txt b/src/operators/math/CMakeLists.txt
index 05e2f4052ec..99708065416 100644
--- a/src/operators/math/CMakeLists.txt
+++ b/src/operators/math/CMakeLists.txt
@@ -1,13 +1,23 @@
 # Add the source files for this directory
 set_full_path(THIS_DIR_SOURCES
+  common.hpp
+
+  abs.cpp
+  binary.cpp
   clamp.cpp
   math_builders.cpp
+  unary.cpp
   )
 
 if (LBANN_HAS_GPU)
   # Add the CUDA source files for this directory
   set_full_path(THIS_DIR_CU_SOURCES
+    common.cuh
+
+    abs.cu
+    binary.cu
     clamp.cu
+    unary.cu
     )
 endif ()
 
diff --git a/src/operators/math/abs.cpp b/src/operators/math/abs.cpp
new file mode 100644
index 00000000000..b010515f9d2
--- /dev/null
+++ b/src/operators/math/abs.cpp
@@ -0,0 +1,114 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/abs.hpp"
+
+#include "common.hpp"
+#include <hydrogen/meta/TypeTraits.hpp>
+
+namespace lbann {
+
+namespace {
+
+template <typename RealT>
+void do_abs_bp(El::Matrix<RealT, El::Device::CPU> const& x,
+               El::Matrix<RealT, El::Device::CPU> const& gradient_wrt_output,
+               El::Matrix<RealT, El::Device::CPU>& gradient_wrt_input)
+{
+  internal::EntrywiseZipInto(x,
+                             gradient_wrt_output,
+                             gradient_wrt_input,
+                             [](RealT const& x, RealT const& dy) {
+                               return (x > El::TypeTraits<RealT>::Zero()
+                                         ? dy
+                                         : (x < El::TypeTraits<RealT>::Zero()
+                                              ? -dy
+                                              : El::TypeTraits<RealT>::Zero()));
+                             });
+}
+
+template <typename RealT>
+void do_abs_bp(
+  El::Matrix<El::Complex<RealT>, El::Device::CPU> const& x,
+  El::Matrix<RealT, El::Device::CPU> const& gradient_wrt_output,
+  El::Matrix<El::Complex<RealT>, El::Device::CPU>& gradient_wrt_input)
+{
+  using ComplexT = El::Complex<RealT>;
+  internal::EntrywiseZipInto(
+    x,
+    gradient_wrt_output,
+    gradient_wrt_input,
+    [](ComplexT const& e, RealT dy) {
+      return (e == ComplexT{0} ? ComplexT{0} : El::Conj(e * (dy / El::Abs(e))));
+    });
+}
+
+} // namespace
+
+template <typename DataT, El::Device D>
+void AbsOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  using CType = DataT;
+  using RType = El::Base<DataT>;
+
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  auto const& input = inputs.front().data();
+  auto& output = outputs.front().data();
+  El::EntrywiseMap(input,
+                   output,
+                   std::function<RType(CType const&)>(
+                     [](CType const& x) { return El::Abs(x); }));
+}
+
+template <typename DataT, El::Device D>
+void AbsOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<ConstLocalOutputTensorType> gradient_wrt_outputs,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT(inputs.size() == 1 && gradient_wrt_inputs.size() == 1);
+
+  auto const& input = inputs.front().data();
+  auto const& grad_wrt_output = gradient_wrt_outputs.front().data();
+  auto& grad_wrt_input = gradient_wrt_inputs.front().data();
+  do_abs_bp(input, grad_wrt_output, grad_wrt_input);
+}
+
+#define PROTO(T) template class AbsOperator<T, El::Device::CPU>
+
+#define LBANN_INSTANTIATE_CPU_HALF
+#include "lbann/macros/instantiate.hpp"
+
+#undef LBANN_INSTANTIATE_CPU_HALF
+#undef PROTO
+#define PROTO(T) template class AbsOperator<El::Complex<T>, El::Device::CPU>
+
+#include "lbann/macros/instantiate.hpp"
+
+} // namespace lbann
diff --git a/src/operators/math/abs.cu b/src/operators/math/abs.cu
new file mode 100644
index 00000000000..b719d6ed79b
--- /dev/null
+++ b/src/operators/math/abs.cu
@@ -0,0 +1,108 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/abs.hpp"
+
+#include "lbann/base.hpp"
+#include "lbann/utils/gpu/helpers.hpp"
+
+#include "common.cuh"
+
+namespace lbann {
+
+namespace {
+
+template <typename DataT>
+struct AbsOpImpl {
+  using ComplexT = thrust::complex<DataT>;
+
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::abs(x);
+  }
+  inline __device__ DataT operator()(ComplexT const& x) const {
+    return thrust::abs(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return (x > (DataT) 0.
+            ? dy
+            : (x < (DataT) 0.
+               ? -dy
+               : (DataT) 0.));
+  }
+  inline __device__ ComplexT operator()(ComplexT const& x,
+                                        DataT const& dy) const {
+    return (x == ComplexT(0.f)
+            ? ComplexT(0.f)
+            : thrust::conj(x * (dy / thrust::abs(x))));
+  }
+};// struct AbsOpImpl
+
+} // namespace
+
+template <typename DataT, El::Device Device>
+void AbsOperator<DataT, Device>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  auto const& input = inputs.front().data();
+  auto& output = outputs.front().data();
+  El::EntrywiseMap(input,
+                   output,
+                   AbsOpImpl<El::Base<DataT>>{});
+}
+
+template <typename DataT, El::Device Device>
+void AbsOperator<DataT, Device>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<ConstLocalOutputTensorType> grads_wrt_outputs,
+  std::vector<LocalInputTensorType> grads_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(grads_wrt_outputs.size() == 1);
+  LBANN_ASSERT_DEBUG(grads_wrt_inputs.size() == 1);
+  auto const& input = inputs.front().data();
+  auto const& grad_wrt_output = grads_wrt_outputs.front().data();
+  auto& grad_wrt_input = grads_wrt_inputs.front().data();
+  internal::EntrywiseZipInto(input,
+                             grad_wrt_output,
+                             grad_wrt_input,
+                             AbsOpImpl<El::Base<DataT>>{});
+}
+
+#define PROTO(T) template class AbsOperator<T, El::Device::GPU>
+
+#define LBANN_INSTANTIATE_GPU_HALF
+#include "lbann/macros/instantiate.hpp"
+
+#undef LBANN_INSTANTIATE_GPU_HALF
+#undef PROTO
+#define PROTO(T) template class AbsOperator<El::Complex<T>, El::Device::GPU>
+
+#include "lbann/macros/instantiate.hpp"
+
+} // namespace lbann
diff --git a/src/operators/math/binary.cpp b/src/operators/math/binary.cpp
new file mode 100644
index 00000000000..ce2b27ea131
--- /dev/null
+++ b/src/operators/math/binary.cpp
@@ -0,0 +1,543 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/binary.hpp"
+
+#include "common.hpp"
+
+namespace lbann {
+
+namespace {
+
+// =========================================================
+// Operator objects for entry-wise binary layers
+// =========================================================
+// Note: Binary operator corresponds to forward prop step
+// (\f$ y = f(x_1,x_2) \f$) and 5-ary operator corresponds
+// to back prop step
+// (\f$ \frac{dL}{dx_i} = \frac{dL}{dy} \frac{df}{dx_i}(x_1,x_2) \f$).
+
+/** Add operator. */
+template <typename DataT>
+struct AddOpImpl
+{
+  DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    return x1 + x2;
+  }
+  void operator()(DataT const& x1,
+                  DataT const& x2,
+                  DataT const& dy,
+                  DataT& dx1,
+                  DataT& dx2) const noexcept
+  {
+    dx1 = dy;
+    dx2 = dy;
+  }
+};
+
+/** Subtract operator. */
+template <typename DataT>
+struct SubtractOpImpl
+{
+  DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    return x1 - x2;
+  }
+  void operator()(DataT const& x1,
+                  DataT const& x2,
+                  DataT const& dy,
+                  DataT& dx1,
+                  DataT& dx2) const noexcept
+  {
+    dx1 = dy;
+    dx2 = -dy;
+  }
+};
+
+/** Multiply operator. */
+template <typename DataT>
+struct MultiplyOpImpl
+{
+  DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    return x1 * x2;
+  }
+  void operator()(DataT const& x1,
+                  DataT const& x2,
+                  DataT const& dy,
+                  DataT& dx1,
+                  DataT& dx2) const noexcept
+  {
+    dx1 = dy * x2;
+    dx2 = dy * x1;
+  }
+};
+
+/** Divide operator. */
+template <typename DataT>
+struct DivideOpImpl
+{
+  DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    return x1 / x2;
+  }
+  void operator()(DataT const& x1,
+                  DataT const& x2,
+                  DataT const& dy,
+                  DataT& dx1,
+                  DataT& dx2) const noexcept
+  {
+    dx1 = dy / x2;
+    dx2 = -dy * x1 / (x2 * x2);
+  }
+};
+
+/** Modulo operator. */
+template <typename DataT>
+struct ModOpImpl
+{
+  DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    using std::fmod;
+    return fmod(x1, x2);
+  }
+  void operator()(DataT const& x1,
+                  DataT const& x2,
+                  DataT const& dy,
+                  DataT& dx1,
+                  DataT& dx2) const noexcept
+  {
+    dx1 = dy;
+    dx2 = -dy * std::floor(x1 / x2);
+  }
+};
+
+/** Power operator. */
+template <typename DataT>
+struct PowOpImpl
+{
+  DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    return El::Pow(x1, x2);
+  }
+  void operator()(DataT const& x1,
+                  DataT const& x2,
+                  DataT const& dy,
+                  DataT& dx1,
+                  DataT& dx2) const noexcept
+  {
+
+    dx1 = dy * x2 * std::pow(x1, x2 - El::TypeTraits<DataT>::One());
+    dx2 = dy * std::log(x1) * std::pow(x1, x2);
+  }
+};
+
+/** Safe divide operator.
+ *  If a standard division produces an infinity or NaN, zero is output
+ *  instead.
+ */
+template <typename DataT>
+struct SafeDivideOpImpl
+{
+  DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    auto const y = x1 / x2;
+    if (std::isfinite(y)) {
+      return y;
+    }
+    else {
+      return El::TypeTraits<DataT>::Zero();
+    }
+  }
+  void operator()(DataT const& x1,
+                  DataT const& x2,
+                  DataT const& dy,
+                  DataT& dx1,
+                  DataT& dx2) const noexcept
+  {
+    auto const y = x1 / x2;
+    if (std::isfinite(y)) {
+      dx1 = dy / x2;
+      dx2 = -dy * x1 / (x2 * x2);
+    }
+    else {
+      dx1 = El::TypeTraits<DataT>::Zero();
+      dx2 = El::TypeTraits<DataT>::Zero();
+    }
+  }
+};
+
+/** Squared difference operator. */
+template <typename DataT>
+struct SquaredDifferenceOpImpl
+{
+  DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    auto const diff = x1 - x2;
+    return diff * diff;
+  }
+  void operator()(DataT const& x1,
+                  DataT const& x2,
+                  DataT const& dy,
+                  DataT& dx1,
+                  DataT& dx2) const noexcept
+  {
+    dx1 = dy * 2 * (x1 - x2);
+    dx2 = dy * 2 * (x2 - x1);
+  }
+};
+
+/** Maximum operator. */
+template <typename DataT>
+struct MaxOpImpl
+{
+  DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    return std::max(x1, x2);
+  }
+  void operator()(DataT const& x1,
+                  DataT const& x2,
+                  DataT const& dy,
+                  DataT& dx1,
+                  DataT& dx2) const noexcept
+  {
+    if (x1 > x2) {
+      dx1 = dy;
+      dx2 = El::TypeTraits<DataT>::Zero();
+    }
+    else if (x2 > x1) {
+      dx1 = El::TypeTraits<DataT>::Zero();
+      dx2 = dy;
+    }
+    else {
+      dx1 = dy / 2;
+      dx2 = dy / 2;
+    }
+  }
+};
+
+/** Minimum operator. */
+template <typename DataT>
+struct MinOpImpl
+{
+  DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    return std::min(x1, x2);
+  }
+  void operator()(DataT const& x1,
+                  DataT const& x2,
+                  DataT const& dy,
+                  DataT& dx1,
+                  DataT& dx2) const noexcept
+  {
+    if (x1 < x2) {
+      dx1 = dy;
+      dx2 = El::TypeTraits<DataT>::Zero();
+    }
+    else if (x2 < x1) {
+      dx1 = El::TypeTraits<DataT>::Zero();
+      dx2 = dy;
+    }
+    else {
+      dx1 = dy / 2;
+      dx2 = dy / 2;
+    }
+  }
+};
+
+/** Equal operator. */
+template <typename DataT>
+struct EqualOpImpl
+{
+  DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    return x1 == x2 ? El::TypeTraits<DataT>::One()
+                    : El::TypeTraits<DataT>::Zero();
+  }
+  void operator()(DataT const& x1,
+                  DataT const& x2,
+                  DataT const& dy,
+                  DataT& dx1,
+                  DataT& dx2) const noexcept
+  {
+    dx1 = El::TypeTraits<DataT>::Zero();
+    dx2 = El::TypeTraits<DataT>::Zero();
+  }
+};
+
+/** Not equal operator. */
+template <typename DataT>
+struct NotEqualOpImpl
+{
+  DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    return x1 == x2 ? El::TypeTraits<DataT>::Zero()
+                    : El::TypeTraits<DataT>::One();
+  }
+  void operator()(DataT const& x1,
+                  DataT const& x2,
+                  DataT const& dy,
+                  DataT& dx1,
+                  DataT& dx2) const noexcept
+  {
+    dx1 = El::TypeTraits<DataT>::Zero();
+    dx2 = El::TypeTraits<DataT>::Zero();
+  }
+};
+
+/** Less than operator. */
+template <typename DataT>
+struct LessOpImpl
+{
+  DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    return x1 < x2 ? El::TypeTraits<DataT>::One()
+                   : El::TypeTraits<DataT>::Zero();
+  }
+  void operator()(DataT const& x1,
+                  DataT const& x2,
+                  DataT const& dy,
+                  DataT& dx1,
+                  DataT& dx2) const noexcept
+  {
+    dx1 = El::TypeTraits<DataT>::Zero();
+    dx2 = El::TypeTraits<DataT>::Zero();
+  }
+};
+
+/** Less than or equal operator. */
+template <typename DataT>
+struct LessEqualOpImpl
+{
+  DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    return x1 <= x2 ? El::TypeTraits<DataT>::One()
+                    : El::TypeTraits<DataT>::Zero();
+  }
+  void operator()(DataT const& x1,
+                  DataT const& x2,
+                  DataT const& dy,
+                  DataT& dx1,
+                  DataT& dx2) const noexcept
+  {
+    dx1 = El::TypeTraits<DataT>::Zero();
+    dx2 = El::TypeTraits<DataT>::Zero();
+  }
+};
+
+/** Greater than operator. */
+template <typename DataT>
+struct GreaterOpImpl
+{
+  DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    return x1 > x2 ? El::TypeTraits<DataT>::One()
+                   : El::TypeTraits<DataT>::Zero();
+  }
+  void operator()(DataT const& x1,
+                  DataT const& x2,
+                  DataT const& dy,
+                  DataT& dx1,
+                  DataT& dx2) const noexcept
+  {
+    dx1 = El::TypeTraits<DataT>::Zero();
+    dx2 = El::TypeTraits<DataT>::Zero();
+  }
+};
+
+/** Greater than or equal operator. */
+template <typename DataT>
+struct GreaterEqualOpImpl
+{
+  DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    return x1 >= x2 ? El::TypeTraits<DataT>::One()
+                    : El::TypeTraits<DataT>::Zero();
+  }
+  void operator()(DataT const& x1,
+                  DataT const& x2,
+                  DataT const& dy,
+                  DataT& dx1,
+                  DataT& dx2) const noexcept
+  {
+    dx1 = El::TypeTraits<DataT>::Zero();
+    dx2 = El::TypeTraits<DataT>::Zero();
+  }
+};
+
+/** Logical and operator. */
+template <typename DataT>
+struct LogicalAndOpImpl
+{
+  DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    bool const b1 = x1 != El::TypeTraits<DataT>::Zero() && !std::isnan(x1);
+    bool const b2 = x2 != El::TypeTraits<DataT>::Zero() && !std::isnan(x2);
+    return (b1 && b2) ? El::TypeTraits<DataT>::One()
+                      : El::TypeTraits<DataT>::Zero();
+  }
+  void operator()(DataT const& x1,
+                  DataT const& x2,
+                  DataT const& dy,
+                  DataT& dx1,
+                  DataT& dx2) const noexcept
+  {
+    dx1 = El::TypeTraits<DataT>::Zero();
+    dx2 = El::TypeTraits<DataT>::Zero();
+  }
+};
+
+/** Logical or operator. */
+template <typename DataT>
+struct LogicalOrOpImpl
+{
+  DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    bool const b1 = x1 != El::TypeTraits<DataT>::Zero() && !std::isnan(x1);
+    bool const b2 = x2 != El::TypeTraits<DataT>::Zero() && !std::isnan(x2);
+    return (b1 || b2) ? El::TypeTraits<DataT>::One()
+                      : El::TypeTraits<DataT>::Zero();
+  }
+  void operator()(DataT const& x1,
+                  DataT const& x2,
+                  DataT const& dy,
+                  DataT& dx1,
+                  DataT& dx2) const noexcept
+  {
+    dx1 = El::TypeTraits<DataT>::Zero();
+    dx2 = El::TypeTraits<DataT>::Zero();
+  }
+};
+
+/** Logical xor operator. */
+template <typename DataT>
+struct LogicalXorOpImpl
+{
+  DataT operator()(DataT const& x1, DataT const& x2) const noexcept
+  {
+    bool const b1 = x1 != El::TypeTraits<DataT>::Zero() && !std::isnan(x1);
+    bool const b2 = x2 != El::TypeTraits<DataT>::Zero() && !std::isnan(x2);
+    return (b1 || b2) && !(b1 && b2) ? El::TypeTraits<DataT>::One()
+                                     : El::TypeTraits<DataT>::Zero();
+  }
+  void operator()(DataT const& x1,
+                  DataT const& x2,
+                  DataT const& dy,
+                  DataT& dx1,
+                  DataT& dx2) const noexcept
+  {
+    dx1 = El::TypeTraits<DataT>::Zero();
+    dx2 = El::TypeTraits<DataT>::Zero();
+  }
+};
+
+} // namespace
+
+// Template instantiation
+#define DEFINE_COMPUTE_OPS(OP_NAME)                                            \
+  template <typename DataT, El::Device Device>                                 \
+  void OP_NAME##Operator<DataT, Device>::fp_compute_local(                     \
+    std::vector<ConstLocalInputTensorType> inputs,                             \
+    std::vector<LocalOutputTensorType> outputs) const                          \
+  {                                                                            \
+    LBANN_ASSERT_DEBUG(inputs.size() == 2);                                    \
+    LBANN_ASSERT_DEBUG(outputs.size() == 1);                                   \
+    auto const& input0 = inputs[0].data();                                     \
+    auto const& input1 = inputs[1].data();                                     \
+    auto& output = outputs.front().data();                                     \
+    internal::EntrywiseZipInto(input0,                                         \
+                               input1,                                         \
+                               output,                                         \
+                               OP_NAME##OpImpl<DataT>{});                      \
+  }                                                                            \
+  template <typename DataT, El::Device Device>                                 \
+  void OP_NAME##Operator<DataT, Device>::bp_compute_local(                     \
+    std::vector<ConstLocalInputTensorType> inputs,                             \
+    std::vector<ConstLocalOutputTensorType> grads_wrt_outputs,                 \
+    std::vector<LocalInputTensorType> grads_wrt_inputs) const                  \
+  {                                                                            \
+    LBANN_ASSERT_DEBUG(inputs.size() == 2);                                    \
+    LBANN_ASSERT_DEBUG(grads_wrt_outputs.size() == 1);                         \
+    LBANN_ASSERT_DEBUG(grads_wrt_inputs.size() == 2);                          \
+    auto const& input0 = inputs[0].data();                                     \
+    auto const& input1 = inputs[1].data();                                     \
+    auto const& grad_wrt_output = grads_wrt_outputs.front().data();            \
+    auto& grad_wrt_input0 = grads_wrt_inputs[0].data();                        \
+    auto& grad_wrt_input1 = grads_wrt_inputs[1].data();                        \
+    internal::apply_binary_backprop_operator(input0,                           \
+                                             input1,                           \
+                                             grad_wrt_output,                  \
+                                             grad_wrt_input0,                  \
+                                             grad_wrt_input1,                  \
+                                             OP_NAME##OpImpl<DataT>{});        \
+  }
+
+DEFINE_COMPUTE_OPS(Add)
+DEFINE_COMPUTE_OPS(Divide)
+DEFINE_COMPUTE_OPS(Equal)
+DEFINE_COMPUTE_OPS(Greater)
+DEFINE_COMPUTE_OPS(GreaterEqual)
+DEFINE_COMPUTE_OPS(Less)
+DEFINE_COMPUTE_OPS(LessEqual)
+DEFINE_COMPUTE_OPS(LogicalAnd)
+DEFINE_COMPUTE_OPS(LogicalOr)
+DEFINE_COMPUTE_OPS(LogicalXor)
+DEFINE_COMPUTE_OPS(Max)
+DEFINE_COMPUTE_OPS(Min)
+DEFINE_COMPUTE_OPS(Mod)
+DEFINE_COMPUTE_OPS(Multiply)
+DEFINE_COMPUTE_OPS(NotEqual)
+DEFINE_COMPUTE_OPS(Pow)
+DEFINE_COMPUTE_OPS(SafeDivide)
+DEFINE_COMPUTE_OPS(SquaredDifference)
+DEFINE_COMPUTE_OPS(Subtract)
+
+#define PROTO(T)                                                               \
+  template class AddOperator<T, El::Device::CPU>;                              \
+  template class SubtractOperator<T, El::Device::CPU>;                         \
+  template class MultiplyOperator<T, El::Device::CPU>;                         \
+  template class DivideOperator<T, El::Device::CPU>;                           \
+  template class ModOperator<T, El::Device::CPU>;                              \
+  template class PowOperator<T, El::Device::CPU>;                              \
+  template class SafeDivideOperator<T, El::Device::CPU>;                       \
+  template class SquaredDifferenceOperator<T, El::Device::CPU>;                \
+  template class MaxOperator<T, El::Device::CPU>;                              \
+  template class MinOperator<T, El::Device::CPU>;                              \
+  template class EqualOperator<T, El::Device::CPU>;                            \
+  template class NotEqualOperator<T, El::Device::CPU>;                         \
+  template class LessOperator<T, El::Device::CPU>;                             \
+  template class LessEqualOperator<T, El::Device::CPU>;                        \
+  template class GreaterOperator<T, El::Device::CPU>;                          \
+  template class GreaterEqualOperator<T, El::Device::CPU>;                     \
+  template class LogicalAndOperator<T, El::Device::CPU>;                       \
+  template class LogicalOrOperator<T, El::Device::CPU>;                        \
+  template class LogicalXorOperator<T, El::Device::CPU>
+
+#define LBANN_INSTANTIATE_CPU_HALF
+#include "lbann/macros/instantiate.hpp"
+
+} // namespace lbann
diff --git a/src/operators/math/binary.cu b/src/operators/math/binary.cu
new file mode 100644
index 00000000000..eff17369e7e
--- /dev/null
+++ b/src/operators/math/binary.cu
@@ -0,0 +1,510 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/binary.hpp"
+#include "lbann/utils/gpu/helpers.hpp"
+
+#include "common.cuh"
+
+namespace lbann {
+
+namespace {
+
+// =========================================================
+// Operator objects for entry-wise binary layers
+// =========================================================
+// Note: Binary operator corresponds to forward prop step
+// (\f$ y = f(x_1,x_2) \f$) and 5-ary operator corresponds
+// to back prop step
+// (\f$ \frac{dL}{dx_i} = \frac{dL}{dy} \frac{df}{dx_i}(x_1,x_2) \f$).
+
+/** Add operator. */
+template <typename DataT>
+struct AddOpImpl {
+  inline __device__ DataT operator()(DataT const& x1, DataT const& x2) const
+  {
+    return x1 + x2;
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    dx1 = dy;
+    dx2 = dy;
+  }
+};
+
+/** Subtract operator. */
+template <typename DataT>
+struct SubtractOpImpl {
+  inline __device__ DataT operator()(DataT const& x1, DataT const& x2) const
+  {
+    return x1 - x2;
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    dx1 = dy;
+    dx2 = -dy;
+  }
+};
+
+/** Multiply operator. */
+template <typename DataT>
+struct MultiplyOpImpl {
+  inline __device__ DataT operator()(DataT const& x1, DataT const& x2) const
+  {
+    return x1 * x2;
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    dx1 = dy * x2;
+    dx2 = dy * x1;
+  }
+};
+
+/** Divide operator. */
+template <typename DataT>
+struct DivideOpImpl {
+  inline __device__ DataT operator()(DataT const& x1, DataT const& x2) const
+  {
+    return x1 / x2;
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    dx1 = dy / x2;
+    dx2 = -dy * x1 / (x2*x2);
+  }
+};
+
+/** Modulo operator. */
+template <typename DataT>
+struct ModOpImpl {
+  inline __device__ DataT operator()(DataT const& x1, DataT const& x2) const
+  {
+    return gpu_lib::mod(x1, x2);
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    dx1 = dy;
+    dx2 = -dy * gpu_lib::floor(x1 / x2);
+  }
+};
+
+/** Power operator. */
+template <typename DataT>
+struct PowOpImpl {
+  inline __device__ DataT operator()(DataT const& x1, DataT const& x2) const
+  {
+    return gpu_lib::pow(x1, x2);
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    dx1 = dy * x2 * gpu_lib::pow(x1, x2 - DataT(1.0));
+    dx2 = dy * gpu_lib::log(x1) * gpu_lib::pow(x1, x2);
+  }
+};
+
+/** Safe divide operator.
+ *  If a standard division produces an infinity or NaN, zero is output
+ *  instead.
+ */
+template <typename DataT>
+struct SafeDivideOpImpl {
+  inline __device__ DataT operator()(DataT const& x1, DataT const& x2) const
+  {
+    auto const& y = x1 / x2;
+    if (gpu_lib::isfinite(y)) { return y; }
+    else             { return DataT(0.0); }
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    auto const& y = x1 / x2;
+    if (gpu_lib::isfinite(y)) {
+      dx1 = dy / x2;
+      dx2 = -dy * x1 / (x2*x2);
+    }
+    else {
+      dx1 = DataT(0.0);
+      dx2 = DataT(0.0);
+    }
+  }
+};
+
+/** Squared difference operator. */
+template <typename DataT>
+struct SquaredDifferenceOpImpl {
+  inline __device__ DataT operator()(DataT const& x1, DataT const& x2) const
+  {
+    auto const& diff = x1 - x2;
+    return diff * diff;
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    dx1 = dy * DataT(2.) * (x1-x2);
+    dx2 = dy * DataT(2.) * (x2-x1);
+  }
+};
+
+/** Maximum operator. */
+template <typename DataT>
+struct MaxOpImpl {
+  inline __device__ DataT operator()(DataT const& x1, DataT const& x2) const
+  {
+    return gpu_lib::max(x1, x2);
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    if (x1 > x2) {
+      dx1 = dy;
+      dx2 = DataT(0.0);
+    }
+    else if (x2 > x1) {
+      dx1 = DataT(0.0);
+      dx2 = dy;
+    }
+    else {
+      dx1 = dy / DataT(2.);
+      dx2 = dy / DataT(2.);
+    }
+  }
+};
+
+/** Minimum operator. */
+template <typename DataT>
+struct MinOpImpl {
+  inline __device__ DataT operator()(DataT const& x1, DataT const& x2) const
+  {
+    return gpu_lib::min(x1, x2);
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    if (x1 < x2) {
+      dx1 = dy;
+      dx2 = DataT(0.0);
+    }
+    else if (x2 < x1) {
+      dx1 = DataT(0.0);
+      dx2 = dy;
+    }
+    else {
+      dx1 = dy / DataT(2.);
+      dx2 = dy / DataT(2.);
+    }
+  }
+};
+
+/** Equal operator. */
+template <typename DataT>
+struct EqualOpImpl {
+  inline __device__ DataT operator()(DataT const& x1, DataT const& x2) const
+  {
+    return x1 == x2 ? DataT(1.0) : DataT(0.0);
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    dx1 = DataT(0.0);
+    dx2 = DataT(0.0);
+  }
+};
+
+/** Not equal operator. */
+template <typename DataT>
+struct NotEqualOpImpl {
+  inline __device__ DataT operator()(DataT const& x1, DataT const& x2) const
+  {
+    return x1 == x2 ? DataT(0.0) : DataT(1.0);
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    dx1 = DataT(0.0);
+    dx2 = DataT(0.0);
+  }
+};
+
+/** Less than operator. */
+template <typename DataT>
+struct LessOpImpl {
+  inline __device__ DataT operator()(DataT const& x1, DataT const& x2) const
+  {
+    return x1 < x2 ? DataT(1.0) : DataT(0.0);
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    dx1 = DataT(0.0);
+    dx2 = DataT(0.0);
+  }
+};
+
+/** Less than or equal operator. */
+template <typename DataT>
+struct LessEqualOpImpl {
+  inline __device__ DataT operator()(DataT const& x1, DataT const& x2) const
+  {
+    return x1 <= x2 ? DataT(1.0) : DataT(0.0);
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    dx1 = DataT(0.0);
+    dx2 = DataT(0.0);
+  }
+};
+
+/** Greater than operator. */
+template <typename DataT>
+struct GreaterOpImpl {
+  inline __device__ DataT operator()(DataT const& x1, DataT const& x2) const
+  {
+    return x1 > x2 ? DataT(1.0) : DataT(0.0);
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    dx1 = DataT(0.0);
+    dx2 = DataT(0.0);
+  }
+};
+
+/** Greater than or equal operator. */
+template <typename DataT>
+struct GreaterEqualOpImpl {
+  inline __device__ DataT operator()(DataT const& x1, DataT const& x2) const
+  {
+    return x1 >= x2 ? DataT(1.0) : DataT(0.0);
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    dx1 = DataT(0.0);
+    dx2 = DataT(0.0);
+  }
+};
+
+/** Logical and operator. */
+template <typename DataT>
+struct LogicalAndOpImpl {
+  inline __device__ DataT operator()(DataT const& x1, DataT const& x2) const
+  {
+    auto const& b1 = x1 != DataT(0.0) && !gpu_lib::isnan(x1);
+    auto const& b2 = x2 != DataT(0.0) && !gpu_lib::isnan(x2);
+    return (b1 && b2) ? DataT(1.0) : DataT(0.0);
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    dx1 = DataT(0.0);
+    dx2 = DataT(0.0);
+  }
+};
+
+/** Logical or operator. */
+template <typename DataT>
+struct LogicalOrOpImpl {
+  inline __device__ DataT operator()(DataT const& x1, DataT const& x2) const
+  {
+    auto const& b1 = x1 != DataT(0.0) && !gpu_lib::isnan(x1);
+    auto const& b2 = x2 != DataT(0.0) && !gpu_lib::isnan(x2);
+    return (b1 || b2) ? DataT(1.0) : DataT(0.0);
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    dx1 = DataT(0.0);
+    dx2 = DataT(0.0);
+  }
+};
+
+/** Logical xor operator. */
+template <typename DataT>
+struct LogicalXorOpImpl {
+  inline __device__ DataT operator()(DataT const& x1, DataT const& x2) const
+  {
+    auto const& b1 = x1 != DataT(0.0) && !gpu_lib::isnan(x1);
+    auto const& b2 = x2 != DataT(0.0) && !gpu_lib::isnan(x2);
+    return (b1 || b2) && !(b1 && b2) ? DataT(1.0) : DataT(0.0);
+  }
+  inline __device__ void operator()(DataT const& x1,
+                                    DataT const& x2,
+                                    DataT const& dy,
+                                    DataT& dx1,
+                                    DataT& dx2) const
+  {
+    dx1 = DataT(0.0);
+    dx2 = DataT(0.0);
+  }
+};
+
+} // namespace
+
+  // Template instantiation
+#define DEFINE_COMPUTE_OPS(OP_NAME)                                     \
+  template <typename DataT, El::Device Device>                          \
+  void OP_NAME##Operator<DataT, Device>::fp_compute_local(              \
+    std::vector<ConstLocalInputTensorType> inputs,                      \
+    std::vector<LocalOutputTensorType> outputs) const                   \
+  {                                                                     \
+    LBANN_ASSERT_DEBUG(inputs.size() == 2);                             \
+    LBANN_ASSERT_DEBUG(outputs.size() == 1);                            \
+    auto const& input0 = inputs[0].data();                              \
+    auto const& input1 = inputs[1].data();                              \
+    auto& output = outputs.front().data();                              \
+    internal::EntrywiseZipInto(input0,                                  \
+                               input1,                                  \
+                               output,                                  \
+                               OP_NAME##OpImpl<DataT>{});               \
+  }                                                                     \
+  template <typename DataT, El::Device Device>                          \
+  void OP_NAME##Operator<DataT, Device>::bp_compute_local(              \
+    std::vector<ConstLocalInputTensorType> inputs,                      \
+    std::vector<ConstLocalOutputTensorType> grads_wrt_outputs,          \
+    std::vector<LocalInputTensorType> grads_wrt_inputs) const           \
+  {                                                                     \
+    LBANN_ASSERT_DEBUG(inputs.size() == 2);                             \
+    LBANN_ASSERT_DEBUG(grads_wrt_outputs.size() == 1);                  \
+    LBANN_ASSERT_DEBUG(grads_wrt_inputs.size() == 2);                   \
+    auto const& input0 = inputs[0].data();                              \
+    auto const& input1 = inputs[1].data();                              \
+    auto const& grad_wrt_output = grads_wrt_outputs.front().data();     \
+    auto& grad_wrt_input0 = grads_wrt_inputs[0].data();                 \
+    auto& grad_wrt_input1 = grads_wrt_inputs[1].data();                 \
+    internal::apply_binary_backprop_operator(input0,                    \
+                                             input1,                    \
+                                             grad_wrt_output,           \
+                                             grad_wrt_input0,           \
+                                             grad_wrt_input1,           \
+                                             OP_NAME##OpImpl<DataT>{}); \
+  }
+
+DEFINE_COMPUTE_OPS(Add)
+DEFINE_COMPUTE_OPS(Divide)
+DEFINE_COMPUTE_OPS(Equal)
+DEFINE_COMPUTE_OPS(Greater)
+DEFINE_COMPUTE_OPS(GreaterEqual)
+DEFINE_COMPUTE_OPS(Less)
+DEFINE_COMPUTE_OPS(LessEqual)
+DEFINE_COMPUTE_OPS(LogicalAnd)
+DEFINE_COMPUTE_OPS(LogicalOr)
+DEFINE_COMPUTE_OPS(LogicalXor)
+DEFINE_COMPUTE_OPS(Max)
+DEFINE_COMPUTE_OPS(Min)
+DEFINE_COMPUTE_OPS(Mod)
+DEFINE_COMPUTE_OPS(Multiply)
+DEFINE_COMPUTE_OPS(NotEqual)
+DEFINE_COMPUTE_OPS(Pow)
+DEFINE_COMPUTE_OPS(SafeDivide)
+DEFINE_COMPUTE_OPS(SquaredDifference)
+DEFINE_COMPUTE_OPS(Subtract)
+
+#define PROTO(T)                                                     \
+  template class AddOperator<T, El::Device::GPU>;                    \
+  template class DivideOperator<T, El::Device::GPU>;                 \
+  template class EqualOperator<T, El::Device::GPU>;                  \
+  template class GreaterEqualOperator<T, El::Device::GPU>;           \
+  template class GreaterOperator<T, El::Device::GPU>;                \
+  template class LessEqualOperator<T, El::Device::GPU>;              \
+  template class LessOperator<T, El::Device::GPU>;                   \
+  template class LogicalAndOperator<T, El::Device::GPU>;             \
+  template class LogicalOrOperator<T, El::Device::GPU>;              \
+  template class LogicalXorOperator<T, El::Device::GPU>;             \
+  template class MaxOperator<T, El::Device::GPU>;                    \
+  template class MinOperator<T, El::Device::GPU>;                    \
+  template class ModOperator<T, El::Device::GPU>;                    \
+  template class MultiplyOperator<T, El::Device::GPU>;               \
+  template class NotEqualOperator<T, El::Device::GPU>;               \
+  template class PowOperator<T, El::Device::GPU>;                    \
+  template class SafeDivideOperator<T, El::Device::GPU>;             \
+  template class SquaredDifferenceOperator<T, El::Device::GPU>;      \
+  template class SubtractOperator<T, El::Device::GPU>
+
+#define LBANN_INSTANTIATE_GPU_HALF
+#include "lbann/macros/instantiate.hpp"
+
+} // namespace lbann
diff --git a/src/operators/math/cereal_registration/CMakeLists.txt b/src/operators/math/cereal_registration/CMakeLists.txt
index e18123597f9..dccb614e18f 100644
--- a/src/operators/math/cereal_registration/CMakeLists.txt
+++ b/src/operators/math/cereal_registration/CMakeLists.txt
@@ -1,6 +1,55 @@
 # Add the source files for this directory
 set_full_path(THIS_DIR_SOURCES
+  abs.cpp
+  acos.cpp
+  acosh.cpp
+  add.cpp
+  asin.cpp
+  asinh.cpp
+  atan.cpp
+  atanh.cpp
+  ceil.cpp
   clamp.cpp
+  cos.cpp
+  cosh.cpp
+  divide.cpp
+  equal.cpp
+  erf.cpp
+  erfinv.cpp
+  exp.cpp
+  expm1.cpp
+  floor.cpp
+  greater.cpp
+  greater_equal.cpp
+  less.cpp
+  less_equal.cpp
+  log.cpp
+  log1p.cpp
+  logical_and.cpp
+  logical_not.cpp
+  logical_or.cpp
+  logical_xor.cpp
+  max.cpp
+  min.cpp
+  mod.cpp
+  multiply.cpp
+  negative.cpp
+  not_equal.cpp
+  pow.cpp
+  reciprocal.cpp
+  round.cpp
+  rsqrt.cpp
+  safe_divide.cpp
+  safe_reciprocal.cpp
+  sign.cpp
+  sin.cpp
+  sinh.cpp
+  sqrt.cpp
+  square.cpp
+  squared_difference.cpp
+  subtract.cpp
+  tan.cpp
+  tanh.cpp
   )
 
 # Propagate the files up the tree
diff --git a/src/operators/math/cereal_registration/abs.cpp b/src/operators/math/cereal_registration/abs.cpp
new file mode 100644
index 00000000000..1084fc1a54c
--- /dev/null
+++ b/src/operators/math/cereal_registration/abs.cpp
@@ -0,0 +1,59 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/abs.hpp"
+
+#include "lbann/utils/serialize.hpp"
+
+#define LBANN_OPERATOR_NAME AbsOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
+
+// This is just sort of a hack for now.
+#include <cereal/types/polymorphic.hpp>
+#include <lbann/macros/common_cereal_registration.hpp>
+
+#undef LBANN_COMMA
+#undef LBANN_REGISTER_OPERATOR_WITH_CEREAL_BASE
+#undef LBANN_REGISTER_OPERATOR_WITH_CEREAL
+#undef PROTO_DEVICE
+#undef PROTO
+
+#define LBANN_COMMA ,
+#define LBANN_REGISTER_OPERATOR_WITH_CEREAL(TYPE, DEVICE)                      \
+  LBANN_ADD_ALL_SERIALIZE_ETI(::lbann::AbsOperator<TYPE LBANN_COMMA DEVICE>);  \
+  CEREAL_REGISTER_TYPE_WITH_NAME(                                              \
+    ::lbann::AbsOperator<TYPE LBANN_COMMA DEVICE>,                             \
+    "AbsOperator(" #TYPE "," #DEVICE ")")
+
+#define PROTO_DEVICE(T, D) LBANN_REGISTER_OPERATOR_WITH_CEREAL(T, D)
+
+PROTO_DEVICE(El::Complex<float>, El::Device::CPU);
+PROTO_DEVICE(El::Complex<double>, El::Device::CPU);
+
+#ifdef LBANN_HAS_GPU
+PROTO_DEVICE(El::Complex<float>, El::Device::GPU);
+PROTO_DEVICE(El::Complex<double>, El::Device::GPU);
+#endif // LBANN_HAS_GPU
diff --git a/src/operators/math/cereal_registration/acos.cpp b/src/operators/math/cereal_registration/acos.cpp
new file mode 100644
index 00000000000..8dc8439a1d3
--- /dev/null
+++ b/src/operators/math/cereal_registration/acos.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME AcosOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/acosh.cpp b/src/operators/math/cereal_registration/acosh.cpp
new file mode 100644
index 00000000000..6582de9e04a
--- /dev/null
+++ b/src/operators/math/cereal_registration/acosh.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME AcoshOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/add.cpp b/src/operators/math/cereal_registration/add.cpp
new file mode 100644
index 00000000000..c7b5becbb45
--- /dev/null
+++ b/src/operators/math/cereal_registration/add.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/binary.hpp"
+
+#define LBANN_OPERATOR_NAME AddOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/asin.cpp b/src/operators/math/cereal_registration/asin.cpp
new file mode 100644
index 00000000000..1c4f824851d
--- /dev/null
+++ b/src/operators/math/cereal_registration/asin.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME AsinOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/asinh.cpp b/src/operators/math/cereal_registration/asinh.cpp
new file mode 100644
index 00000000000..4dc55456f82
--- /dev/null
+++ b/src/operators/math/cereal_registration/asinh.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME AsinhOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/atan.cpp b/src/operators/math/cereal_registration/atan.cpp
new file mode 100644
index 00000000000..0ded9ab0834
--- /dev/null
+++ b/src/operators/math/cereal_registration/atan.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME AtanOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/atanh.cpp b/src/operators/math/cereal_registration/atanh.cpp
new file mode 100644
index 00000000000..76a5c898302
--- /dev/null
+++ b/src/operators/math/cereal_registration/atanh.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME AtanhOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/ceil.cpp b/src/operators/math/cereal_registration/ceil.cpp
new file mode 100644
index 00000000000..a29d554d906
--- /dev/null
+++ b/src/operators/math/cereal_registration/ceil.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME CeilOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/clamp.cpp b/src/operators/math/cereal_registration/clamp.cpp
index 5cbb2af77f0..55d0404be84 100644
--- a/src/operators/math/cereal_registration/clamp.cpp
+++ b/src/operators/math/cereal_registration/clamp.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/layers/math/cereal_registration/cos.cpp b/src/operators/math/cereal_registration/cos.cpp
similarity index 66%
rename from src/layers/math/cereal_registration/cos.cpp
rename to src/operators/math/cereal_registration/cos.cpp
index 06f59e6a565..b9560362554 100644
--- a/src/layers/math/cereal_registration/cos.cpp
+++ b/src/operators/math/cereal_registration/cos.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -23,22 +23,9 @@
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
 
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-cos_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
 
-#define LBANN_LAYER_NAME cos_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
+#define LBANN_OPERATOR_NAME CosOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/cosh.cpp b/src/operators/math/cereal_registration/cosh.cpp
new file mode 100644
index 00000000000..1cff6ce1cd1
--- /dev/null
+++ b/src/operators/math/cereal_registration/cosh.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME CoshOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/divide.cpp b/src/operators/math/cereal_registration/divide.cpp
new file mode 100644
index 00000000000..d15a375261f
--- /dev/null
+++ b/src/operators/math/cereal_registration/divide.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/binary.hpp"
+
+#define LBANN_OPERATOR_NAME DivideOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/equal.cpp b/src/operators/math/cereal_registration/equal.cpp
new file mode 100644
index 00000000000..79930d6aa48
--- /dev/null
+++ b/src/operators/math/cereal_registration/equal.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/binary.hpp"
+
+#define LBANN_OPERATOR_NAME EqualOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/erf.cpp b/src/operators/math/cereal_registration/erf.cpp
similarity index 66%
rename from src/layers/math/cereal_registration/erf.cpp
rename to src/operators/math/cereal_registration/erf.cpp
index 139d9cc90bb..cbb29f476ae 100644
--- a/src/layers/math/cereal_registration/erf.cpp
+++ b/src/operators/math/cereal_registration/erf.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -23,22 +23,9 @@
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
 
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-erf_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
 
-#define LBANN_LAYER_NAME erf_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
+#define LBANN_OPERATOR_NAME ErfOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/erfinv.cpp b/src/operators/math/cereal_registration/erfinv.cpp
new file mode 100644
index 00000000000..3020f5a3b35
--- /dev/null
+++ b/src/operators/math/cereal_registration/erfinv.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME ErfInvOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/exp.cpp b/src/operators/math/cereal_registration/exp.cpp
similarity index 66%
rename from src/layers/math/cereal_registration/exp.cpp
rename to src/operators/math/cereal_registration/exp.cpp
index efd45e92180..cc253c1e2d3 100644
--- a/src/layers/math/cereal_registration/exp.cpp
+++ b/src/operators/math/cereal_registration/exp.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -23,22 +23,9 @@
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
 
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-exp_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
 
-#define LBANN_LAYER_NAME exp_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
+#define LBANN_OPERATOR_NAME ExpOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/expm1.cpp b/src/operators/math/cereal_registration/expm1.cpp
new file mode 100644
index 00000000000..caa170b0304
--- /dev/null
+++ b/src/operators/math/cereal_registration/expm1.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME Expm1Operator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/floor.cpp b/src/operators/math/cereal_registration/floor.cpp
new file mode 100644
index 00000000000..31690ee433c
--- /dev/null
+++ b/src/operators/math/cereal_registration/floor.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME FloorOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/greater.cpp b/src/operators/math/cereal_registration/greater.cpp
new file mode 100644
index 00000000000..7ae3cc74970
--- /dev/null
+++ b/src/operators/math/cereal_registration/greater.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/binary.hpp"
+
+#define LBANN_OPERATOR_NAME GreaterOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/greater_equal.cpp b/src/operators/math/cereal_registration/greater_equal.cpp
new file mode 100644
index 00000000000..2b7ce2c95b6
--- /dev/null
+++ b/src/operators/math/cereal_registration/greater_equal.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/binary.hpp"
+
+#define LBANN_OPERATOR_NAME GreaterEqualOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/less.cpp b/src/operators/math/cereal_registration/less.cpp
new file mode 100644
index 00000000000..fdb69769e7f
--- /dev/null
+++ b/src/operators/math/cereal_registration/less.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/binary.hpp"
+
+#define LBANN_OPERATOR_NAME LessOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/less_equal.cpp b/src/operators/math/cereal_registration/less_equal.cpp
new file mode 100644
index 00000000000..eb500ca4f1f
--- /dev/null
+++ b/src/operators/math/cereal_registration/less_equal.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/binary.hpp"
+
+#define LBANN_OPERATOR_NAME LessEqualOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/log.cpp b/src/operators/math/cereal_registration/log.cpp
similarity index 66%
rename from src/layers/math/cereal_registration/log.cpp
rename to src/operators/math/cereal_registration/log.cpp
index 2a753750cc6..9edad505b94 100644
--- a/src/layers/math/cereal_registration/log.cpp
+++ b/src/operators/math/cereal_registration/log.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -23,22 +23,9 @@
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
 
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-log_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
 
-#define LBANN_LAYER_NAME log_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
+#define LBANN_OPERATOR_NAME LogOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/log1p.cpp b/src/operators/math/cereal_registration/log1p.cpp
new file mode 100644
index 00000000000..e0efef3b3ad
--- /dev/null
+++ b/src/operators/math/cereal_registration/log1p.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME Log1pOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/logical_and.cpp b/src/operators/math/cereal_registration/logical_and.cpp
new file mode 100644
index 00000000000..a1af23910db
--- /dev/null
+++ b/src/operators/math/cereal_registration/logical_and.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/binary.hpp"
+
+#define LBANN_OPERATOR_NAME LogicalAndOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/logical_not.cpp b/src/operators/math/cereal_registration/logical_not.cpp
new file mode 100644
index 00000000000..70095f36295
--- /dev/null
+++ b/src/operators/math/cereal_registration/logical_not.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME LogicalNotOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/logical_or.cpp b/src/operators/math/cereal_registration/logical_or.cpp
new file mode 100644
index 00000000000..d40fc01b2e1
--- /dev/null
+++ b/src/operators/math/cereal_registration/logical_or.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/binary.hpp"
+
+#define LBANN_OPERATOR_NAME LogicalOrOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/logical_xor.cpp b/src/operators/math/cereal_registration/logical_xor.cpp
new file mode 100644
index 00000000000..ac1ffb830fa
--- /dev/null
+++ b/src/operators/math/cereal_registration/logical_xor.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/binary.hpp"
+
+#define LBANN_OPERATOR_NAME LogicalXorOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/max.cpp b/src/operators/math/cereal_registration/max.cpp
new file mode 100644
index 00000000000..235824b4973
--- /dev/null
+++ b/src/operators/math/cereal_registration/max.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/binary.hpp"
+
+#define LBANN_OPERATOR_NAME MaxOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/min.cpp b/src/operators/math/cereal_registration/min.cpp
new file mode 100644
index 00000000000..01b70f94bd1
--- /dev/null
+++ b/src/operators/math/cereal_registration/min.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/binary.hpp"
+
+#define LBANN_OPERATOR_NAME MinOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/mod.cpp b/src/operators/math/cereal_registration/mod.cpp
new file mode 100644
index 00000000000..d69db6a8bd8
--- /dev/null
+++ b/src/operators/math/cereal_registration/mod.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/binary.hpp"
+
+#define LBANN_OPERATOR_NAME ModOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/multiply.cpp b/src/operators/math/cereal_registration/multiply.cpp
new file mode 100644
index 00000000000..cdbf5b5951f
--- /dev/null
+++ b/src/operators/math/cereal_registration/multiply.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/binary.hpp"
+
+#define LBANN_OPERATOR_NAME MultiplyOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/negative.cpp b/src/operators/math/cereal_registration/negative.cpp
new file mode 100644
index 00000000000..45c56c43c24
--- /dev/null
+++ b/src/operators/math/cereal_registration/negative.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME NegativeOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/not_equal.cpp b/src/operators/math/cereal_registration/not_equal.cpp
new file mode 100644
index 00000000000..f76b2a833ef
--- /dev/null
+++ b/src/operators/math/cereal_registration/not_equal.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/binary.hpp"
+
+#define LBANN_OPERATOR_NAME NotEqualOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/pow.cpp b/src/operators/math/cereal_registration/pow.cpp
new file mode 100644
index 00000000000..ee25d574336
--- /dev/null
+++ b/src/operators/math/cereal_registration/pow.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/binary.hpp"
+
+#define LBANN_OPERATOR_NAME PowOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/reciprocal.cpp b/src/operators/math/cereal_registration/reciprocal.cpp
new file mode 100644
index 00000000000..30ec2699741
--- /dev/null
+++ b/src/operators/math/cereal_registration/reciprocal.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME ReciprocalOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/round.cpp b/src/operators/math/cereal_registration/round.cpp
new file mode 100644
index 00000000000..9816258b94d
--- /dev/null
+++ b/src/operators/math/cereal_registration/round.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME RoundOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/rsqrt.cpp b/src/operators/math/cereal_registration/rsqrt.cpp
new file mode 100644
index 00000000000..c8088ba08c9
--- /dev/null
+++ b/src/operators/math/cereal_registration/rsqrt.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME RsqrtOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/safe_divide.cpp b/src/operators/math/cereal_registration/safe_divide.cpp
new file mode 100644
index 00000000000..6fb6cd0d21a
--- /dev/null
+++ b/src/operators/math/cereal_registration/safe_divide.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/binary.hpp"
+
+#define LBANN_OPERATOR_NAME SafeDivideOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/safe_reciprocal.cpp b/src/operators/math/cereal_registration/safe_reciprocal.cpp
new file mode 100644
index 00000000000..1e4f037897b
--- /dev/null
+++ b/src/operators/math/cereal_registration/safe_reciprocal.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME SafeReciprocalOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/sign.cpp b/src/operators/math/cereal_registration/sign.cpp
new file mode 100644
index 00000000000..8eae39c15d6
--- /dev/null
+++ b/src/operators/math/cereal_registration/sign.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME SignOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/sin.cpp b/src/operators/math/cereal_registration/sin.cpp
similarity index 66%
rename from src/layers/math/cereal_registration/sin.cpp
rename to src/operators/math/cereal_registration/sin.cpp
index 9c1c8e2f34b..05b34234164 100644
--- a/src/layers/math/cereal_registration/sin.cpp
+++ b/src/operators/math/cereal_registration/sin.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -23,22 +23,9 @@
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
 
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-sin_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
 
-#define LBANN_LAYER_NAME sin_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
+#define LBANN_OPERATOR_NAME SinOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/sinh.cpp b/src/operators/math/cereal_registration/sinh.cpp
new file mode 100644
index 00000000000..1364db5ef54
--- /dev/null
+++ b/src/operators/math/cereal_registration/sinh.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME SinhOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/sqrt.cpp b/src/operators/math/cereal_registration/sqrt.cpp
new file mode 100644
index 00000000000..5279d73be7d
--- /dev/null
+++ b/src/operators/math/cereal_registration/sqrt.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME SqrtOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/square.cpp b/src/operators/math/cereal_registration/square.cpp
new file mode 100644
index 00000000000..eae089f8f98
--- /dev/null
+++ b/src/operators/math/cereal_registration/square.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME SquareOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/squared_difference.cpp b/src/operators/math/cereal_registration/squared_difference.cpp
new file mode 100644
index 00000000000..c0aa701315e
--- /dev/null
+++ b/src/operators/math/cereal_registration/squared_difference.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/binary.hpp"
+
+#define LBANN_OPERATOR_NAME SquaredDifferenceOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/subtract.cpp b/src/operators/math/cereal_registration/subtract.cpp
new file mode 100644
index 00000000000..d04dd4d36a0
--- /dev/null
+++ b/src/operators/math/cereal_registration/subtract.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/binary.hpp"
+
+#define LBANN_OPERATOR_NAME SubtractOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/layers/math/cereal_registration/tan.cpp b/src/operators/math/cereal_registration/tan.cpp
similarity index 66%
rename from src/layers/math/cereal_registration/tan.cpp
rename to src/operators/math/cereal_registration/tan.cpp
index 9bd95bbf67a..9b5d79b14ef 100644
--- a/src/layers/math/cereal_registration/tan.cpp
+++ b/src/operators/math/cereal_registration/tan.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -23,22 +23,9 @@
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
-#include "lbann/utils/serialize.hpp"
-#include <lbann/layers/math/unary.hpp>
-
-namespace lbann {
 
-template <typename TensorDataType, data_layout Layout, El::Device Device>
-template <typename ArchiveT>
-void
-tan_layer<TensorDataType,Layout,Device>
-::serialize(ArchiveT& ar) {
-  using DataTypeLayer = data_type_layer<TensorDataType>;
-  ar(::cereal::make_nvp("DataTypeLayer",
-                        ::cereal::base_class<DataTypeLayer>(this)));
-}
-
-} // namespace lbann
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
 
-#define LBANN_LAYER_NAME tan_layer
-#include <lbann/macros/register_layer_with_cereal.hpp>
+#define LBANN_OPERATOR_NAME TanOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/tanh.cpp b/src/operators/math/cereal_registration/tanh.cpp
new file mode 100644
index 00000000000..e292bd70854
--- /dev/null
+++ b/src/operators/math/cereal_registration/tanh.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/utils/serialize.hpp"
+#include "lbann/operators/math/unary.hpp"
+
+#define LBANN_OPERATOR_NAME TanhOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/clamp.cpp b/src/operators/math/clamp.cpp
index 48272164a47..38ea746cb0a 100644
--- a/src/operators/math/clamp.cpp
+++ b/src/operators/math/clamp.cpp
@@ -24,64 +24,25 @@
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
 
-#define LBANN_CLAMP_OPERATOR_INSTANTIATE
 #include "lbann/operators/math/clamp.hpp"
+#include "lbann/utils/exception.hpp"
 
-namespace lbann {
-
-namespace {
-
-/** Local forward prop computation. */
-template <typename DataT>
-void local_fp(DataT min,
-              DataT max,
-              El::Matrix<DataT, El::Device::CPU> const& input,
-              El::Matrix<DataT, El::Device::CPU>& output)
-{
-  const auto& height = input.Height();
-  const auto& width = input.Width();
-  LBANN_OMP_PARALLEL_FOR_COLLAPSE2
-  for (El::Int col = 0; col < width; ++col) {
-    for (El::Int row = 0; row < height; ++row) {
-      const auto& x = input(row, col);
-      output(row, col) = std::max(min, std::min(max, x));
-    }
-  }
-}
+#include "common.hpp"
 
-/** Local backprop computation. */
-template <typename DataT>
-void local_bp(DataT min,
-              DataT max,
-              El::Matrix<DataT, El::Device::CPU> const& input,
-              El::Matrix<DataT, El::Device::CPU> const& gradient_wrt_output,
-              El::Matrix<DataT, El::Device::CPU>& gradient_wrt_input)
-{
-  const auto& height = input.Height();
-  const auto& width = input.Width();
-  LBANN_OMP_PARALLEL_FOR_COLLAPSE2
-  for (El::Int col = 0; col < width; ++col) {
-    for (El::Int row = 0; row < height; ++row) {
-      const auto& x = input(row, col);
-      const auto& dy = gradient_wrt_output(row, col);
-      auto& dx = gradient_wrt_input(row, col);
-      dx = (x <= min || x >= max) ? El::TypeTraits<DataT>::Zero() : dy;
-    }
-  }
-}
-
-} // namespace
+namespace lbann {
 
 template <typename DataT, El::Device D>
 void ClampOperator<DataT, D>::fp_compute_local(
   std::vector<ConstLocalInputTensorType> inputs,
   std::vector<LocalOutputTensorType> outputs) const
 {
-  LBANN_ASSERT(inputs.size() == 1 && outputs.size() == 1);
-  local_fp(this->m_min,
-           this->m_max,
-           inputs.front().data(),
-           outputs.front().data());
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  El::EntrywiseMap(inputs.front().data(),
+                   outputs.front().data(),
+                   std::function<DataT(DataT const&)>([this](DataT const& x) {
+                     return std::max(m_min, std::min(m_max, x));
+                   }));
 }
 
 template <typename DataT, El::Device D>
@@ -90,14 +51,17 @@ void ClampOperator<DataT, D>::bp_compute_local(
   std::vector<ConstLocalOutputTensorType> gradient_wrt_outputs,
   std::vector<LocalInputTensorType> gradient_wrt_inputs) const
 {
-  LBANN_ASSERT(inputs.size() == 1 && gradient_wrt_outputs.size() == 1 &&
-               gradient_wrt_inputs.size() == 1);
-
-  local_bp(this->m_min,
-           this->m_max,
-           inputs.front().data(),
-           gradient_wrt_outputs.front().data(),
-           gradient_wrt_inputs.front().data());
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(gradient_wrt_outputs.size() == 1);
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  internal::EntrywiseZipInto(inputs.front().data(),
+                             gradient_wrt_outputs.front().data(),
+                             gradient_wrt_inputs.front().data(),
+                             [this](auto const& x, auto const& dy) {
+                               return (x <= m_min || x >= m_max)
+                                        ? El::TypeTraits<DataT>::Zero()
+                                        : dy;
+                             });
 }
 
 #define PROTO(T) template class ClampOperator<T, El::Device::CPU>
diff --git a/src/operators/math/common.cuh b/src/operators/math/common.cuh
new file mode 100644
index 00000000000..eb6d476ef2d
--- /dev/null
+++ b/src/operators/math/common.cuh
@@ -0,0 +1,240 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+#ifndef LBANN_SRC_OPERATORS_MATH_COMMON_CUH_INCLUDED
+#define LBANN_SRC_OPERATORS_MATH_COMMON_CUH_INCLUDED
+
+#if defined __CUDACC__ || defined __HIPCC__
+
+#include "lbann/base.hpp"
+
+namespace lbann {
+namespace internal {
+namespace kernel {
+
+/** @brief Apply a functor to a 2-D column-major matrix buffer.
+ *
+ *  This can be applied to a row-major matrix by logically transposing
+ *  the matrix.
+ *
+ *  @tparam TILE_DIM The number of rows/columns being processed by a
+ *                   thread block.
+ *  @tparam BLK_COLS The number of columns handled at one time in the
+ *                   block.
+ *
+ *  @tparam S (Inferred) Type of first source buffer.
+ *  @tparam T (Inferred) Type of second source buffer.
+ *  @tparam U (Inferred) Type of target buffer.
+ *  @tparam SizeT (Inferred) Type of integer used to express sizes.
+ *  @tparam FunctorT (Inferred) Type of functor. Must be equivalent to
+ *                              `T(S const&)`.
+ *
+ *  @param m The number of rows in A/B/C.
+ *  @param n The number of columns in A/B/C. Columns must be contiguous
+ *           in memory.
+ *  @param A The first source matrix buffer.
+ *  @param lda The stride between columns of A in terms of elements of
+ *             type S.
+ *  @param B The second source matrix buffer.
+ *  @param ldb The stride between columns of B in terms of elements of
+ *             type T.
+ *  @param C The target matrix buffer.
+ *  @param ldc The stride between columns of C in terms of elements of
+ *             type U.
+ *  @param func The functor to apply. Must be device-invocable.
+ */
+template <int TILE_DIM,
+          int BLK_COLS,
+          typename S,
+          typename T,
+          typename U,
+          typename SizeT,
+          typename FunctorT>
+__global__ void entrywise_zip_into_kernel_naive(
+    SizeT m, SizeT n,
+    S const* const __restrict__  A, SizeT lda,
+    T const* const __restrict__  B, SizeT ldb,
+    U * const __restrict__ C, SizeT ldc,
+    FunctorT func)
+{
+    size_t const row_idx = blockIdx.x * TILE_DIM + threadIdx.x;
+    size_t const col_idx = blockIdx.y * TILE_DIM + threadIdx.y;
+
+    if (row_idx < m)
+    {
+        for (int ii = 0; ii < TILE_DIM && col_idx + ii < n; ii += BLK_COLS)
+            C[row_idx + (col_idx+ii)*ldc] =
+                func(A[row_idx + (col_idx+ii)*lda],
+                     B[row_idx + (col_idx+ii)*ldb]);
+    }
+}
+
+    /** CUDA kernel to apply an binary backprop operator. */
+template <typename DataT, typename F>
+__global__
+void binary_backprop_operator_kernel(El::Int height, El::Int width,
+                                     DataT const* const __restrict__ x1,
+                                     El::Int x1_ldim,
+                                     DataT const* const __restrict__ x2,
+                                     El::Int x2_ldim,
+                                     DataT const* const __restrict__ dy,
+                                     El::Int dy_ldim,
+                                     DataT* const __restrict__ dx1,
+                                     El::Int dx1_ldim,
+                                     DataT* const __restrict__ dx2,
+                                     El::Int dx2_ldim,
+                                     F func)
+{
+  El::Int const gid = threadIdx.x + blockIdx.x * blockDim.x;
+  El::Int const size = height * width;
+  El::Int const num_threads = blockDim.x * gridDim.x;
+  for (El::Int pos = gid; pos < size; pos += num_threads) {
+    auto const& row = pos % height;
+    auto const& col = pos / height;
+    func(x1[row + col * x1_ldim],
+         x2[row + col * x2_ldim],
+         dy[row + col * dy_ldim],
+         dx1[row + col * dx1_ldim],
+         dx2[row + col * dx2_ldim]);
+  }
+}
+
+}// namespace kernel
+
+/** @brief Apply a functor to 2-D column-major matrix buffers.
+ *
+ *  @warning Calling this function is only valid in device-compiled code.
+ *
+ *  @tparam S (Inferred) Type of first source buffer.
+ *  @tparam T (Inferred) Type of second source buffer.
+ *  @tparam U (Inferred) Type of target buffer.
+ *  @tparam FunctorT (Inferred) Type of functor. Must be equivalent to
+ *                              `U(S const&, T const&)`.
+ *
+ *  @param A The first source matrix.
+ *  @param B The second source matrix.
+ *  @param B The target matrix.
+ *  @param func The functor to apply. Must be device-invocable.
+ */
+template <typename S, typename T, typename U, typename FunctorT>
+void EntrywiseZipInto(El::Matrix<S, El::Device::GPU> const& A,
+                      El::Matrix<T, El::Device::GPU> const& B,
+                      El::Matrix<U, El::Device::GPU>& C,
+                      FunctorT func)
+{
+  auto multisync = El::MakeMultiSync(gpu::get_sync_info(C),
+                                     gpu::get_sync_info(A),
+                                     gpu::get_sync_info(B));
+
+  auto const m = A.Height();
+  auto const n = A.Width();
+  if (m == El::TypeTraits<El::Int>::Zero()
+      || n == El::TypeTraits<El::Int>::Zero())
+  {
+    // Nothing to do
+    return;
+  }
+
+  constexpr int TILE_DIM = El::gpu::Default2DTileSize();
+  constexpr int BLK_COLS = 8;
+
+  static_assert(TILE_DIM % BLK_COLS == 0,
+                "Incompatible TILE_DIM, BLK_COLS.");
+
+  dim3 blks((m + TILE_DIM - 1) / TILE_DIM,
+            (n + TILE_DIM - 1) / TILE_DIM,
+            1);
+  dim3 thds(TILE_DIM, BLK_COLS, 1);
+
+  El::gpu::LaunchKernel(
+    kernel::entrywise_zip_into_kernel_naive<
+      TILE_DIM,
+      BLK_COLS,
+      El::NativeGPUType<S>,
+      El::NativeGPUType<T>,
+      El::NativeGPUType<U>,
+      El::Int,
+      FunctorT>,
+    blks, thds, 0, multisync,
+    m, n,
+    El::AsNativeGPUType(A.LockedBuffer()), A.LDim(),
+    El::AsNativeGPUType(B.LockedBuffer()), B.LDim(),
+    El::AsNativeGPUType(C.Buffer()), C.LDim(),
+    func);
+}
+
+
+/** Apply a binary backprop operator to GPU data.
+ *  The input and output data must be on GPU and must have the same
+ *  dimensions. Given a binary function \f$ y = f(x_1,x_2) \f$, the
+ *  corresponding BinaryBackPropOperator is a 5-ary function with the
+ *  arguments \f$ x_1 \f$, \f$ x_2 \f$, \f$ dL/dy \f$, \f$ dL/dx_1\f$,
+ *  \f$ dL/dx_2 \f$. The last two arguments should be overwritten when
+ *  the BinaryBackPropOperator is called.
+ */
+template <typename DataT, typename F>
+void apply_binary_backprop_operator(El::Matrix<DataT, El::Device::GPU> const& x1,
+                                    El::Matrix<DataT, El::Device::GPU> const& x2,
+                                    El::Matrix<DataT, El::Device::GPU> const& dy,
+                                    El::Matrix<DataT, El::Device::GPU>& dx1,
+                                    El::Matrix<DataT, El::Device::GPU>& dx2,
+                                    F func)
+{
+  // Get CUDA grid dimensions
+  // Note: Maximum CUDA grid dimension is 2^32-1
+  // (https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications).
+  El::Int const height = x1.Height();
+  El::Int const width = x1.Width();
+  El::Int const block_dim = 256;
+  El::Int grid_dim = (height * width + block_dim - 1) / block_dim;
+  if (sizeof(El::Int) > sizeof(unsigned int)
+      && grid_dim > std::numeric_limits<uint32_t>::max()) {
+    grid_dim = std::numeric_limits<uint32_t>::max();
+  }
+
+  // Launch CUDA kernel
+  if (grid_dim > 0) {
+    auto multisync = El::MakeMultiSync(gpu::get_sync_info(dx2),
+                                       gpu::get_sync_info(dx1),
+                                       gpu::get_sync_info(dy),
+                                       gpu::get_sync_info(x2),
+                                       gpu::get_sync_info(x1));
+    hydrogen::gpu::LaunchKernel(
+      kernel::binary_backprop_operator_kernel<DataT, F>,
+      grid_dim, block_dim, 0, multisync,
+      height, width,
+      x1.LockedBuffer(), x1.LDim(),
+      x2.LockedBuffer(), x2.LDim(),
+      dy.LockedBuffer(), dy.LDim(),
+      dx1.Buffer(), dx1.LDim(),
+      dx2.Buffer(), dx2.LDim(),
+      func);
+  }
+}
+
+}// namespace internal
+}// namespace lbann
+#endif // defined __CUDACC__ || defined __HIPCC__
+#endif // LBANN_SRC_OPERATORS_MATH_COMMON_CUH_INCLUDED
diff --git a/src/operators/math/common.hpp b/src/operators/math/common.hpp
new file mode 100644
index 00000000000..06e4b1d26f0
--- /dev/null
+++ b/src/operators/math/common.hpp
@@ -0,0 +1,122 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+#ifndef LBANN_SRC_OPERATORS_MATH_COMMON_HPP_INCLUDED
+#define LBANN_SRC_OPERATORS_MATH_COMMON_HPP_INCLUDED
+
+#include "lbann/base.hpp"
+
+namespace lbann {
+namespace internal {
+
+/** @brief A binary entrywise map c <- f(a,b).
+ */
+template <typename S, typename T, typename U, typename F>
+void EntrywiseZipInto(El::Matrix<S, El::Device::CPU> const& A,
+                      El::Matrix<T, El::Device::CPU> const& B,
+                      El::Matrix<U, El::Device::CPU>& C,
+                      F func)
+{
+  EL_DEBUG_CSE;
+  auto const m = A.Height();
+  auto const n = A.Width();
+
+  LBANN_ASSERT_DEBUG(B.Height() == m);
+  LBANN_ASSERT_DEBUG(B.Width() == n);
+
+  LBANN_ASSERT_DEBUG(C.Height() == m);
+  LBANN_ASSERT_DEBUG(C.Width() == n);
+
+  S const* ABuf = A.LockedBuffer();
+  T const* BBuf = B.LockedBuffer();
+  U* CBuf = C.Buffer();
+
+  auto const ALDim = A.LDim();
+  auto const BLDim = B.LDim();
+  auto const CLDim = C.LDim();
+
+  // Use entry-wise parallelization for column vectors. Otherwise
+  // use column-wise parallelization.
+  if (n == 1) {
+    EL_PARALLEL_FOR
+    for (El::Int i = 0; i < m; ++i) {
+      CBuf[i] = func(ABuf[i], BBuf[i]);
+    }
+  }
+  else {
+    EL_PARALLEL_FOR_COLLAPSE2
+    for (El::Int j = 0; j < n; ++j) {
+      for (El::Int i = 0; i < m; ++i) {
+        CBuf[i + j * CLDim] = func(ABuf[i + j * ALDim], BBuf[i + j * BLDim]);
+      }
+    }
+  }
+}
+
+/** Apply a binary backprop operator to CPU data.
+ *  The input and output data must be on CPU and must have the same
+ *  dimensions. Given a binary function \f$ y = f(x_1,x_2) \f$, the
+ *  corresponding BinaryBackPropOperator is a 5-ary function with the
+ *  arguments \f$ x_1 \f$, \f$ x_2 \f$, \f$ dL/dy \f$, \f$ dL/dx_1\f$,
+ *  \f$ dL/dx_2 \f$. The last two arguments should be overwritten when
+ *  the BinaryBackPropOperator is called.
+ */
+template <typename DataT, typename F>
+void apply_binary_backprop_operator(
+  El::Matrix<DataT, El::Device::CPU> const& x1,
+  El::Matrix<DataT, El::Device::CPU> const& x2,
+  El::Matrix<DataT, El::Device::CPU> const& dy,
+  El::Matrix<DataT, El::Device::CPU>& dx1,
+  El::Matrix<DataT, El::Device::CPU>& dx2,
+  F f)
+{
+  if (x1.Contiguous() && x2.Contiguous() && dy.Contiguous() &&
+      dx1.Contiguous() && dx2.Contiguous()) {
+    const auto* x1_buffer = x1.LockedBuffer();
+    const auto* x2_buffer = x2.LockedBuffer();
+    const auto* dy_buffer = dy.LockedBuffer();
+    auto* dx1_buffer = dx1.Buffer();
+    auto* dx2_buffer = dx2.Buffer();
+    const size_t size = x1.Height() * x1.Width();
+    LBANN_OMP_PARALLEL_FOR
+    for (size_t i = 0; i < size; ++i) {
+      f(x1_buffer[i], x2_buffer[i], dy_buffer[i], dx1_buffer[i], dx2_buffer[i]);
+    }
+  }
+  else {
+    auto const width = x1.Width();
+    auto const height = x1.Height();
+    LBANN_OMP_PARALLEL_FOR_COLLAPSE2
+    for (El::Int jj = 0; jj < width; ++jj) {
+      for (El::Int ii = 0; ii < height; ++ii) {
+        f(x1(ii, jj), x2(ii, jj), dy(ii, jj), dx1(ii, jj), dx2(ii, jj));
+      }
+    }
+  }
+}
+
+} // namespace internal
+} // namespace lbann
+#endif // LBANN_SRC_OPERATORS_MATH_COMMON_HPP_INCLUDED
diff --git a/src/operators/math/math_builders.cpp b/src/operators/math/math_builders.cpp
index d81ad3c23c6..279e47756c4 100644
--- a/src/operators/math/math_builders.cpp
+++ b/src/operators/math/math_builders.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -26,8 +26,69 @@
 
 #include <lbann/operators/math/math_builders_impl.hpp>
 
-namespace lbann {
+#define LBANN_ABS_OP_COMPLEX_ETI(D)                                            \
+  template std::unique_ptr<lbann::Operator<El::Complex<float>, float, D>>      \
+  lbann::build_abs_operator<El::Complex<float>, D>(                            \
+    lbann_data::Operator const&);                                              \
+  template std::unique_ptr<lbann::Operator<El::Complex<double>, double, D>>    \
+  lbann::build_abs_operator<El::Complex<double>, D>(                           \
+    lbann_data::Operator const&)
+LBANN_ABS_OP_COMPLEX_ETI(El::Device::CPU);
+#ifdef LBANN_HAS_GPU
+LBANN_ABS_OP_COMPLEX_ETI(El::Device::GPU);
+#endif
+#undef LBANN_ABS_OP_COMPLEX_ETI
+
+#define PROTO_DEVICE(T, D)                                                     \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(abs, T, D);                           \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(acos, T, D);                          \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(acosh, T, D);                         \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(add, T, D);                           \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(asin, T, D);                          \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(asinh, T, D);                         \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(atan, T, D);                          \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(atanh, T, D);                         \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(ceil, T, D);                          \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(clamp, T, D);                         \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(cos, T, D);                           \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(cosh, T, D);                          \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(divide, T, D);                        \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(equal, T, D);                         \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(erf, T, D);                           \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(erfinv, T, D);                        \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(exp, T, D);                           \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(expm1, T, D);                         \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(floor, T, D);                         \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(greater, T, D);                       \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(greater_equal, T, D);                 \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(less, T, D);                          \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(less_equal, T, D);                    \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(log, T, D);                           \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(log1p, T, D);                         \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(logical_and, T, D);                   \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(logical_not, T, D);                   \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(logical_or, T, D);                    \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(logical_xor, T, D);                   \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(max, T, D);                           \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(min, T, D);                           \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(mod, T, D);                           \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(multiply, T, D);                      \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(negative, T, D);                      \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(not_equal, T, D);                     \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(pow, T, D);                           \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(reciprocal, T, D);                    \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(round, T, D);                         \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(rsqrt, T, D);                         \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(safe_divide, T, D);                   \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(safe_reciprocal, T, D);               \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(sign, T, D);                          \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(sin, T, D);                           \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(sinh, T, D);                          \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(sqrt, T, D);                          \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(square, T, D);                        \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(squared_difference, T, D);            \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(subtract, T, D);                      \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(tan, T, D);                           \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(tanh, T, D)
 
-#define PROTO_DEVICE(T, D) LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(clamp, T, D);
 #include <lbann/macros/instantiate_device.hpp>
-} // namespace lbann
diff --git a/src/operators/math/unary.cpp b/src/operators/math/unary.cpp
new file mode 100644
index 00000000000..e60810c27e3
--- /dev/null
+++ b/src/operators/math/unary.cpp
@@ -0,0 +1,567 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/unary.hpp"
+
+#include "common.hpp"
+
+namespace lbann {
+namespace {
+
+// Operator implementations objects for entry-wise unary operators
+//
+// Note: Unary apply() corresponds to forward prop step
+// (\f$ y = f(x) \f$) and binary apply() corresponds to
+// back prop step
+// (\f$ \frac{dL}{dx} = \frac{dL}{dy} f'(x) \f$).
+
+// Logical not operator.
+template <typename DataT>
+struct LogicalNotOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept
+  {
+    const auto& b = x != El::TypeTraits<DataT>::Zero() && !std::isnan(x);
+    return !b ? El::TypeTraits<DataT>::One() : El::TypeTraits<DataT>::Zero();
+  }
+  DataT operator()(DataT const& x, DataT const& /*dy*/) const noexcept
+  {
+    return El::TypeTraits<DataT>::Zero();
+  }
+};
+
+// Negative operator.
+template <typename DataT>
+struct NegativeOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept { return -x; }
+  DataT operator()(DataT const& /*x*/, DataT const& dy) const noexcept
+  {
+    return -dy;
+  }
+};
+
+// Sign operator.
+template <typename DataT>
+struct SignOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept
+  {
+    if (x > El::TypeTraits<DataT>::Zero()) {
+      return El::TypeTraits<DataT>::One();
+    }
+    else if (x < El::TypeTraits<DataT>::Zero()) {
+      return -El::TypeTraits<DataT>::One();
+    }
+    else {
+      return El::TypeTraits<DataT>::Zero();
+    }
+  }
+  DataT operator()(DataT const& /*x*/, DataT const& /*dy*/) const noexcept
+  {
+    return El::TypeTraits<DataT>::Zero();
+  }
+};
+
+// Round operator.
+template <typename DataT>
+struct RoundOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept
+  {
+    using std::round;
+    return round(x);
+  }
+  DataT operator()(DataT const& /*x*/, DataT const& /*dy*/) const noexcept
+  {
+    return El::TypeTraits<DataT>::Zero();
+  }
+};
+
+// Ceiling operator.
+template <typename DataT>
+struct CeilOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept
+  {
+    using std::ceil;
+    return ceil(x);
+  }
+  DataT operator()(DataT const& /*x*/, DataT const& /*dy*/) const noexcept
+  {
+    return El::TypeTraits<DataT>::Zero();
+  }
+};
+
+// Floor operator.
+template <typename DataT>
+struct FloorOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept
+  {
+    using std::floor;
+    return floor(x);
+  }
+  DataT operator()(DataT const& /*x*/, DataT const& /*dy*/) const noexcept
+  {
+    return El::TypeTraits<DataT>::Zero();
+  }
+};
+
+/** Reciprocal operator.
+ *  If a standard reciprocal produces an infinity or NaN,
+ * El::TypeTraits<DataT>::Zero() is output instead.
+ */
+template <typename DataT>
+struct ReciprocalOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept
+  {
+    return El::To<DataT>(1) / x;
+  }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    if (dy == El::TypeTraits<DataT>::Zero()) {
+      return El::TypeTraits<DataT>::Zero();
+    }
+    else {
+      return -dy / (x * x);
+    }
+  }
+};
+
+// Square operator.
+template <typename DataT>
+struct SquareOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept { return x * x; }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    return El::To<DataT>(2) * x * dy;
+  }
+};
+
+// Square root operator.
+template <typename DataT>
+struct SqrtOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept { return El::Sqrt(x); }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    return dy / (El::To<DataT>(2) * El::Sqrt(x));
+  }
+};
+
+// Reciprocal square root operator.
+template <typename DataT>
+struct RsqrtOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept
+  {
+    return El::To<DataT>(1) / El::Sqrt(x);
+  }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    const auto& s = El::Sqrt(x);
+    return -dy / (El::To<DataT>(2) * x * s);
+  }
+};
+
+// Safe reciprocal operator.
+template <typename DataT>
+struct SafeReciprocalOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept
+  {
+    const auto& y = El::To<DataT>(1) / x;
+    if (std::isfinite(y)) {
+      return y;
+    }
+    else {
+      return El::TypeTraits<DataT>::Zero();
+    }
+  }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    const auto& y = El::To<DataT>(1) / x;
+    if (std::isfinite(y)) {
+      return -dy * y * y;
+    }
+    else {
+      return El::TypeTraits<DataT>::Zero();
+    }
+  }
+};
+
+// Exponential operator.
+template <typename DataT>
+struct ExpOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept { return El::Exp(x); }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    return dy * El::Exp(x);
+  }
+};
+
+// Exponential minus one operator.
+template <typename DataT>
+struct Expm1OpImpl
+{
+  DataT operator()(DataT const& x) const noexcept
+  {
+    using std::expm1;
+    return expm1(x);
+  }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    return dy * El::Exp(x);
+  }
+};
+
+// Natural logarithm operator.
+template <typename DataT>
+struct LogOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept { return El::Log(x); }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    return dy / x;
+  }
+};
+
+// Natural logarithm one plus operator.
+template <typename DataT>
+struct Log1pOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept
+  {
+    using std::log1p;
+    return log1p(x);
+  }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    return dy / (x + El::TypeTraits<DataT>::One());
+  }
+};
+
+// Cosine operator.
+template <typename DataT>
+struct CosOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept { return El::Cos(x); }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    return -dy * El::Sin(x);
+  }
+};
+
+// Sine operator.
+template <typename DataT>
+struct SinOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept { return El::Sin(x); }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    return dy * El::Cos(x);
+  }
+};
+
+// Tangent operator.
+template <typename DataT>
+struct TanOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept { return El::Tan(x); }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    const auto& c = El::Cos(x);
+    return dy / (c * c);
+  }
+};
+
+// Arccosine operator.
+template <typename DataT>
+struct AcosOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept { return El::Acos(x); }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    return -dy / El::Sqrt(El::TypeTraits<DataT>::One() - x * x);
+  }
+};
+
+// Arcsine operator.
+template <typename DataT>
+struct AsinOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept { return El::Asin(x); }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    return dy / El::Sqrt(El::TypeTraits<DataT>::One() - x * x);
+  }
+};
+
+// Arctangent operator.
+template <typename DataT>
+struct AtanOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept { return El::Atan(x); }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    return dy / (El::TypeTraits<DataT>::One() + x * x);
+  }
+};
+
+// Hyperbolic cosine operator.
+template <typename DataT>
+struct CoshOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept { return El::Cosh(x); }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    return dy * El::Sinh(x);
+  }
+};
+
+// Hyperbolic sine operator.
+template <typename DataT>
+struct SinhOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept { return El::Sinh(x); }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    return dy * El::Cosh(x);
+  }
+};
+
+// Hyperbolic tangent operator.
+template <typename DataT>
+struct TanhOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept { return El::Tanh(x); }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    const auto& c = El::Cosh(x);
+    return dy / (c * c);
+  }
+};
+
+// Hyperbolic arccosine operator.
+template <typename DataT>
+struct AcoshOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept { return El::Acosh(x); }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    return -dy / (El::Sqrt(x - El::TypeTraits<DataT>::One()) *
+                  El::Sqrt(x + El::TypeTraits<DataT>::One()));
+  }
+};
+
+// Hyperbolic arcsine operator.
+template <typename DataT>
+struct AsinhOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept { return El::Asinh(x); }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    return dy / El::Sqrt(El::TypeTraits<DataT>::One() + x * x);
+  }
+};
+
+// Hyperbolic arctangent operator.
+template <typename DataT>
+struct AtanhOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept { return El::Atanh(x); }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    return dy / (El::TypeTraits<DataT>::One() - x * x);
+  }
+};
+
+// Error function operator.
+template <typename DataT>
+struct ErfOpImpl
+{
+  DataT operator()(DataT const& x) const noexcept
+  {
+    return El::To<DataT>(std::erf(El::To<double>(x)));
+  }
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    const auto two_rsqrt_pi = El::To<DataT>(1.12837916709551257389);
+    return dy * two_rsqrt_pi * El::Exp(-x * x);
+  }
+};
+
+// Inverse error function operator.
+template <typename DataT>
+struct ErfInvOpImpl
+{
+
+  DataT operator()(DataT const& x) const noexcept
+  {
+
+    // Trivial cases
+    const DataT inf = std::numeric_limits<DataT>::infinity();
+    if (x <= -El::TypeTraits<DataT>::One()) {
+      return -inf;
+    }
+    if (x >= El::TypeTraits<DataT>::One()) {
+      return inf;
+    }
+
+    // Apply Newton's method
+    const double x_ = El::To<double>(x);
+    double y = x_;
+    constexpr double half_sqrt_pi = 0.88622692545275801364;
+    constexpr double eps = std::numeric_limits<double>::epsilon();
+    constexpr int max_iters = 50;
+    for (int iter = 0; iter < max_iters; ++iter) {
+      const double err = std::erf(y) - x_;
+      if (std::isinf(y) || std::abs(err) < eps) {
+        break;
+      }
+      y -= err * half_sqrt_pi * std::exp(y * y);
+    }
+    return El::To<DataT>(y);
+  }
+
+  DataT operator()(DataT const& x, DataT const& dy) const noexcept
+  {
+    if (El::Abs(x) >= El::TypeTraits<DataT>::One()) {
+      return El::TypeTraits<DataT>::Zero();
+    }
+    else {
+      const auto half_sqrt_pi = El::To<DataT>(0.88622692545275801364);
+      const auto y = this->operator()(x);
+      return dy * half_sqrt_pi * El::Exp(y * y);
+    }
+  }
+};
+
+} // namespace
+
+// Template instantiation
+#define DEFINE_COMPUTE_OPS(OP_NAME)                                            \
+  template <typename DataT, El::Device Device>                                 \
+  void OP_NAME##Operator<DataT, Device>::fp_compute_local(                     \
+    std::vector<ConstLocalInputTensorType> inputs,                             \
+    std::vector<LocalOutputTensorType> outputs) const                          \
+  {                                                                            \
+    LBANN_ASSERT_DEBUG(inputs.size() == 1);                                    \
+    LBANN_ASSERT_DEBUG(outputs.size() == 1);                                   \
+    auto const& input = inputs.front().data();                                 \
+    auto& output = outputs.front().data();                                     \
+    El::EntrywiseMap(                                                          \
+      input,                                                                   \
+      output,                                                                  \
+      std::function<DataT(DataT const&)>(OP_NAME##OpImpl<DataT>{}));           \
+  }                                                                            \
+  template <typename DataT, El::Device Device>                                 \
+  void OP_NAME##Operator<DataT, Device>::bp_compute_local(                     \
+    std::vector<ConstLocalInputTensorType> inputs,                             \
+    std::vector<ConstLocalOutputTensorType> grads_wrt_outputs,                 \
+    std::vector<LocalInputTensorType> grads_wrt_inputs) const                  \
+  {                                                                            \
+    LBANN_ASSERT_DEBUG(inputs.size() == 1);                                    \
+    LBANN_ASSERT_DEBUG(grads_wrt_outputs.size() == 1);                         \
+    LBANN_ASSERT_DEBUG(grads_wrt_inputs.size() == 1);                          \
+    auto const& input = inputs.front().data();                                 \
+    auto const& grad_wrt_output = grads_wrt_outputs.front().data();            \
+    auto& grad_wrt_input = grads_wrt_inputs.front().data();                    \
+    internal::EntrywiseZipInto(input,                                          \
+                               grad_wrt_output,                                \
+                               grad_wrt_input,                                 \
+                               OP_NAME##OpImpl<DataT>{});                      \
+  }
+
+DEFINE_COMPUTE_OPS(Acos)
+DEFINE_COMPUTE_OPS(Acosh)
+DEFINE_COMPUTE_OPS(Asin)
+DEFINE_COMPUTE_OPS(Asinh)
+DEFINE_COMPUTE_OPS(Atan)
+DEFINE_COMPUTE_OPS(Atanh)
+DEFINE_COMPUTE_OPS(Ceil)
+DEFINE_COMPUTE_OPS(Cos)
+DEFINE_COMPUTE_OPS(Cosh)
+DEFINE_COMPUTE_OPS(Erf)
+DEFINE_COMPUTE_OPS(ErfInv)
+DEFINE_COMPUTE_OPS(Exp)
+DEFINE_COMPUTE_OPS(Expm1)
+DEFINE_COMPUTE_OPS(Floor)
+DEFINE_COMPUTE_OPS(Log)
+DEFINE_COMPUTE_OPS(Log1p)
+DEFINE_COMPUTE_OPS(LogicalNot)
+DEFINE_COMPUTE_OPS(Negative)
+DEFINE_COMPUTE_OPS(Reciprocal)
+DEFINE_COMPUTE_OPS(Round)
+DEFINE_COMPUTE_OPS(Rsqrt)
+DEFINE_COMPUTE_OPS(SafeReciprocal)
+DEFINE_COMPUTE_OPS(Sign)
+DEFINE_COMPUTE_OPS(Sin)
+DEFINE_COMPUTE_OPS(Sinh)
+DEFINE_COMPUTE_OPS(Sqrt)
+DEFINE_COMPUTE_OPS(Square)
+DEFINE_COMPUTE_OPS(Tan)
+DEFINE_COMPUTE_OPS(Tanh)
+
+#define PROTO(T)                                                               \
+  template class AcosOperator<T, El::Device::CPU>;                             \
+  template class AcoshOperator<T, El::Device::CPU>;                            \
+  template class AsinOperator<T, El::Device::CPU>;                             \
+  template class AsinhOperator<T, El::Device::CPU>;                            \
+  template class AtanOperator<T, El::Device::CPU>;                             \
+  template class AtanhOperator<T, El::Device::CPU>;                            \
+  template class CeilOperator<T, El::Device::CPU>;                             \
+  template class CosOperator<T, El::Device::CPU>;                              \
+  template class CoshOperator<T, El::Device::CPU>;                             \
+  template class ErfInvOperator<T, El::Device::CPU>;                           \
+  template class ErfOperator<T, El::Device::CPU>;                              \
+  template class ExpOperator<T, El::Device::CPU>;                              \
+  template class Expm1Operator<T, El::Device::CPU>;                            \
+  template class FloorOperator<T, El::Device::CPU>;                            \
+  template class Log1pOperator<T, El::Device::CPU>;                            \
+  template class LogOperator<T, El::Device::CPU>;                              \
+  template class LogicalNotOperator<T, El::Device::CPU>;                       \
+  template class NegativeOperator<T, El::Device::CPU>;                         \
+  template class ReciprocalOperator<T, El::Device::CPU>;                       \
+  template class RoundOperator<T, El::Device::CPU>;                            \
+  template class RsqrtOperator<T, El::Device::CPU>;                            \
+  template class SafeReciprocalOperator<T, El::Device::CPU>;                   \
+  template class SignOperator<T, El::Device::CPU>;                             \
+  template class SinOperator<T, El::Device::CPU>;                              \
+  template class SinhOperator<T, El::Device::CPU>;                             \
+  template class SqrtOperator<T, El::Device::CPU>;                             \
+  template class SquareOperator<T, El::Device::CPU>;                           \
+  template class TanOperator<T, El::Device::CPU>;                              \
+  template class TanhOperator<T, El::Device::CPU>
+
+#define LBANN_INSTANTIATE_CPU_HALF
+#include "lbann/macros/instantiate.hpp"
+
+} // namespace lbann
diff --git a/src/operators/math/unary.cu b/src/operators/math/unary.cu
new file mode 100644
index 00000000000..d5879a3b445
--- /dev/null
+++ b/src/operators/math/unary.cu
@@ -0,0 +1,486 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/unary.hpp"
+
+#include "lbann/base.hpp"
+#include "lbann/utils/gpu/helpers.hpp"
+
+#include "common.cuh"
+
+namespace lbann {
+
+namespace {
+
+// =========================================================
+// Operator objects for entry-wise unary layers
+// =========================================================
+// Note: Unary operator corresponds to forward prop step
+// (\f$ y = f(x) \f$) and binary operator corresponds to
+// back prop step
+// (\f$ \frac{dL}{dx} = \frac{dL}{dy} f'(x) \f$).
+
+/** Logical not operator. */
+template <typename DataT>
+struct LogicalNotOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    auto const& b = x != DataT(0.0) && !gpu_lib::isnan(x);
+    return !b ? DataT(1.0) : DataT(0.0);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return DataT(0.0);
+  }
+};
+
+/** Negative operator. */
+template <typename DataT>
+struct NegativeOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return -x;
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return -dy;
+  }
+};
+
+/** Sign operator. */
+template <typename DataT>
+struct SignOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    DataT const zero = 0.;
+    DataT const one = 1.;
+    if      (x > zero) { return one;  }
+    else if (x < zero) { return -one; }
+    else               { return zero; }
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return DataT(0.0);
+  }
+};
+
+/** Round operator. */
+template <typename DataT>
+struct RoundOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::round(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return DataT(0.0);
+  }
+};
+
+/** Ceiling operator. */
+template <typename DataT>
+struct CeilOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::ceil(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return DataT(0.0);
+  }
+};
+
+/** Floor operator. */
+template <typename DataT>
+struct FloorOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::floor(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return DataT(0.0);
+  }
+};
+
+/** Reciprocal operator. */
+template <typename DataT>
+struct ReciprocalOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return DataT(1.) / x;
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    if (dy == DataT(0.0)) { return DataT(0.0); }
+    else                   { return - dy / (x*x); }
+
+  }
+};
+
+/** Square operator. */
+template <typename DataT>
+struct SquareOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return x*x;
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return DataT(2.) * x * dy;
+  }
+};
+
+
+/** Square root operator. */
+template <typename DataT>
+struct SqrtOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::sqrt(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return dy / (DataT(2.) * gpu_lib::sqrt(x));
+  }
+};
+
+/** Reciprocal square root operator. */
+template <typename DataT>
+struct RsqrtOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::rsqrt(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    auto const& s = gpu_lib::sqrt(x);
+    return - dy / (DataT(2.) * x * s);
+  }
+};
+
+/** Safe reciprocal operator.
+ *  If a standard reciprocal produces an infinity or NaN, zero is
+ *  output instead.
+ */
+template <typename DataT>
+struct SafeReciprocalOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    auto const& y = DataT(1.) / x;
+    if (gpu_lib::isfinite(y)) { return y; }
+    else             { return DataT(0.0); }
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    auto const& y = DataT(1.) / x;
+    if (gpu_lib::isfinite(y)) { return - dy * y*y; }
+    else             { return DataT(0.0); }
+  }
+};
+
+/** Exponential operator. */
+template <typename DataT>
+struct ExpOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::exp(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return dy * gpu_lib::exp(x);
+  }
+};
+
+/** Exponential minus one operator. */
+template <typename DataT>
+struct Expm1OpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::expm1(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return dy * gpu_lib::exp(x);
+  }
+};
+
+/** Natural logarithm operator. */
+template <typename DataT>
+struct LogOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::log(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return dy / x;
+  }
+};
+
+/** Natural logarithm one plus operator. */
+template <typename DataT>
+struct Log1pOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::log1p(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return dy / (x + DataT(1.0));
+  }
+};
+
+/** Cosine operator. */
+template <typename DataT>
+struct CosOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::cos(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return -dy * gpu_lib::sin(x);
+  }
+};
+
+/** Sine operator. */
+template <typename DataT>
+struct SinOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::sin(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return dy * gpu_lib::cos(x);
+  }
+};
+
+/** Tangent operator. */
+template <typename DataT>
+struct TanOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::tan(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    auto const& c = gpu_lib::cos(x);
+    return dy / (c*c);
+  }
+};
+
+/** Arccosine operator. */
+template <typename DataT>
+struct AcosOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::acos(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return -dy / gpu_lib::sqrt(DataT(1.0) - x*x);
+  }
+};
+
+/** Arcsine operator. */
+template <typename DataT>
+struct AsinOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::asin(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return dy / gpu_lib::sqrt(DataT(1.0) - x*x);
+  }
+};
+
+/** Arctangent operator. */
+template <typename DataT>
+struct AtanOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::atan(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return dy / (DataT(1.0) + x*x);
+  }
+};
+
+/** Hyperbolic cosine operator. */
+template <typename DataT>
+struct CoshOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::cosh(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return dy * gpu_lib::sinh(x);
+  }
+};
+
+/** Hyperbolic sine operator. */
+template <typename DataT>
+struct SinhOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::sinh(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return dy * gpu_lib::cosh(x);
+  }
+};
+
+/** Hyperbolic tangent operator. */
+template <typename DataT>
+struct TanhOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::tanh(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    auto const& c = gpu_lib::cosh(x);
+    return dy / (c*c);
+  }
+};
+
+/** Hyperbolic arccosine operator. */
+template <typename DataT>
+struct AcoshOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::acosh(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return -dy / (gpu_lib::sqrt(x - DataT(1.0)) * gpu_lib::sqrt(x + DataT(1.0)));
+  }
+};
+
+/** Hyperbolic arcsine operator. */
+template <typename DataT>
+struct AsinhOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::asinh(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return dy / gpu_lib::sqrt(DataT(1.0) + x*x);
+  }
+};
+
+/** Hyperbolic arctangent operator. */
+template <typename DataT>
+struct AtanhOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::atanh(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    return dy / (DataT(1.0) - x*x);
+  }
+};
+
+/** Error function operator. */
+template <typename DataT>
+struct ErfOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::erf(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    DataT const two_rsqrt_pi(1.12837916709551257389);
+    return dy * two_rsqrt_pi * gpu_lib::exp(-x*x);
+  }
+};
+
+/** Inverse error function operator. */
+template <typename DataT>
+struct ErfInvOpImpl {
+  inline __device__ DataT operator()(DataT const& x) const {
+    return gpu_lib::erfinv(x);
+  }
+  inline __device__ DataT operator()(DataT const& x, DataT const& dy) const {
+    DataT const half_sqrt_pi(0.88622692545275801364);
+    auto const& y = gpu_lib::erfinv(x);
+    return dy * half_sqrt_pi * gpu_lib::exp(y*y);
+  }
+};
+
+} // namespace
+
+// Template instantiation
+#define DEFINE_COMPUTE_OPS(OP_NAME)                                            \
+  template <typename DataT, El::Device Device>                                 \
+  void OP_NAME##Operator<DataT, Device>::fp_compute_local(                     \
+    std::vector<ConstLocalInputTensorType> inputs,                             \
+    std::vector<LocalOutputTensorType> outputs) const                          \
+  {                                                                            \
+    LBANN_ASSERT_DEBUG(inputs.size() == 1);                                    \
+    LBANN_ASSERT_DEBUG(outputs.size() == 1);                                   \
+    auto const& input = inputs.front().data();                                 \
+    auto& output = outputs.front().data();                                     \
+    El::EntrywiseMap(input,                                                    \
+                     output,                                                   \
+                     OP_NAME##OpImpl<DataT>{});                                \
+  }                                                                            \
+  template <typename DataT, El::Device Device>                                 \
+  void OP_NAME##Operator<DataT, Device>::bp_compute_local(                     \
+    std::vector<ConstLocalInputTensorType> inputs,                             \
+    std::vector<ConstLocalOutputTensorType> grads_wrt_outputs,                 \
+    std::vector<LocalInputTensorType> grads_wrt_inputs) const                  \
+  {                                                                            \
+    LBANN_ASSERT_DEBUG(inputs.size() == 1);                                    \
+    LBANN_ASSERT_DEBUG(grads_wrt_outputs.size() == 1);                         \
+    LBANN_ASSERT_DEBUG(grads_wrt_inputs.size() == 1);                          \
+    auto const& input = inputs.front().data();                                 \
+    auto const& grad_wrt_output = grads_wrt_outputs.front().data();            \
+    auto& grad_wrt_input = grads_wrt_inputs.front().data();                    \
+    internal::EntrywiseZipInto(input,                                          \
+                               grad_wrt_output,                                \
+                               grad_wrt_input,                                 \
+                               OP_NAME##OpImpl<DataT>{});                      \
+  }
+
+DEFINE_COMPUTE_OPS(Acos)
+DEFINE_COMPUTE_OPS(Acosh)
+DEFINE_COMPUTE_OPS(Asin)
+DEFINE_COMPUTE_OPS(Asinh)
+DEFINE_COMPUTE_OPS(Atan)
+DEFINE_COMPUTE_OPS(Atanh)
+DEFINE_COMPUTE_OPS(Ceil)
+DEFINE_COMPUTE_OPS(Cos)
+DEFINE_COMPUTE_OPS(Cosh)
+DEFINE_COMPUTE_OPS(Erf)
+DEFINE_COMPUTE_OPS(ErfInv)
+DEFINE_COMPUTE_OPS(Exp)
+DEFINE_COMPUTE_OPS(Expm1)
+DEFINE_COMPUTE_OPS(Floor)
+DEFINE_COMPUTE_OPS(Log)
+DEFINE_COMPUTE_OPS(Log1p)
+DEFINE_COMPUTE_OPS(LogicalNot)
+DEFINE_COMPUTE_OPS(Negative)
+DEFINE_COMPUTE_OPS(Reciprocal)
+DEFINE_COMPUTE_OPS(Round)
+DEFINE_COMPUTE_OPS(Rsqrt)
+DEFINE_COMPUTE_OPS(SafeReciprocal)
+DEFINE_COMPUTE_OPS(Sign)
+DEFINE_COMPUTE_OPS(Sin)
+DEFINE_COMPUTE_OPS(Sinh)
+DEFINE_COMPUTE_OPS(Sqrt)
+DEFINE_COMPUTE_OPS(Square)
+DEFINE_COMPUTE_OPS(Tan)
+DEFINE_COMPUTE_OPS(Tanh)
+
+#define PROTO(T)                                                \
+  template class AcosOperator<T, El::Device::GPU>;              \
+  template class AcoshOperator<T, El::Device::GPU>;             \
+  template class AsinOperator<T, El::Device::GPU>;              \
+  template class AsinhOperator<T, El::Device::GPU>;             \
+  template class AtanOperator<T, El::Device::GPU>;              \
+  template class AtanhOperator<T, El::Device::GPU>;             \
+  template class CeilOperator<T, El::Device::GPU>;              \
+  template class CosOperator<T, El::Device::GPU>;               \
+  template class CoshOperator<T, El::Device::GPU>;              \
+  template class ErfInvOperator<T, El::Device::GPU>;            \
+  template class ErfOperator<T, El::Device::GPU>;               \
+  template class ExpOperator<T, El::Device::GPU>;               \
+  template class Expm1Operator<T, El::Device::GPU>;             \
+  template class FloorOperator<T, El::Device::GPU>;             \
+  template class Log1pOperator<T, El::Device::GPU>;             \
+  template class LogOperator<T, El::Device::GPU>;               \
+  template class LogicalNotOperator<T, El::Device::GPU>;        \
+  template class NegativeOperator<T, El::Device::GPU>;          \
+  template class ReciprocalOperator<T, El::Device::GPU>;        \
+  template class RoundOperator<T, El::Device::GPU>;             \
+  template class RsqrtOperator<T, El::Device::GPU>;             \
+  template class SafeReciprocalOperator<T, El::Device::GPU>;    \
+  template class SignOperator<T, El::Device::GPU>;              \
+  template class SinOperator<T, El::Device::GPU>;               \
+  template class SinhOperator<T, El::Device::GPU>;              \
+  template class SqrtOperator<T, El::Device::GPU>;              \
+  template class SquareOperator<T, El::Device::GPU>;            \
+  template class TanOperator<T, El::Device::GPU>;               \
+  template class TanhOperator<T, El::Device::GPU>
+
+
+#define LBANN_INSTANTIATE_GPU_HALF
+#include "lbann/macros/instantiate.hpp"
+
+} // namespace lbann
diff --git a/src/operators/math/unit_test/CMakeLists.txt b/src/operators/math/unit_test/CMakeLists.txt
index c9457c3ae57..2bc416c3865 100644
--- a/src/operators/math/unit_test/CMakeLists.txt
+++ b/src/operators/math/unit_test/CMakeLists.txt
@@ -1,5 +1,11 @@
 set_full_path(THIS_DIR_MPI_CATCH2_TEST_FILES
+  abs_test.cpp
+  add_test.cpp
   clamp_test.cpp
+  cos_test.cpp
+  multiply_test.cpp
+  sin_test.cpp
+  subtract_test.cpp
   )
 
 set(LBANN_MPI_CATCH2_TEST_FILES
diff --git a/src/operators/math/unit_test/OperatorTraits.hpp b/src/operators/math/unit_test/OperatorTraits.hpp
new file mode 100644
index 00000000000..834e7c82753
--- /dev/null
+++ b/src/operators/math/unit_test/OperatorTraits.hpp
@@ -0,0 +1,100 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+#ifndef LBANN_SRC_OPERATORS_MATH_UNIT_TEST_OPERATORTRAITS_HPP_INCLUDED
+#define LBANN_SRC_OPERATORS_MATH_UNIT_TEST_OPERATORTRAITS_HPP_INCLUDED
+
+#include <lbann/operators/operator.hpp>
+
+namespace lbann {
+
+/** @brief The data type for data-parallel computation */
+template <typename T, El::Device D>
+using DataParallelMatrixType =
+  El::DistMatrix<T, El::Dist::STAR, El::Dist::VC, El::DistWrap::ELEMENT, D>;
+template <typename T, El::Device D>
+using ModelParallelMatrixType =
+  El::DistMatrix<T, El::Dist::MC, El::Dist::MR, El::DistWrap::ELEMENT, D>;
+
+template <typename OpT>
+struct OperatorTraits;
+
+template <typename InputT, typename OutputT, El::Device D>
+struct OperatorTraits<Operator<InputT, OutputT, D>>
+{
+  using input_value_type = InputT;
+  using output_value_type = OutputT;
+  using base_type = Operator<InputT, OutputT, D>;
+  using input_data_parallel_mat_type = DataParallelMatrixType<InputT, D>;
+  using output_data_parallel_mat_type = DataParallelMatrixType<OutputT, D>;
+  using input_model_parallel_mat_type = ModelParallelMatrixType<InputT, D>;
+  using output_model_parallel_mat_type = ModelParallelMatrixType<OutputT, D>;
+  using input_tensor_type = utils::DistTensorView<InputT, D>;
+  using output_tensor_type = utils::DistTensorView<OutputT, D>;
+  using input_const_tensor_type = utils::ConstDistTensorView<InputT, D>;
+  using output_const_tensor_type = utils::ConstDistTensorView<OutputT, D>;
+  static constexpr El::Device device = D;
+};
+
+template <typename OpT>
+constexpr El::Device Device = OperatorTraits<OpT>::device;
+
+template <typename OpT>
+using InputValueType = typename OperatorTraits<OpT>::input_value_type;
+template <typename OpT>
+using OutputValueType = typename OperatorTraits<OpT>::output_value_type;
+template <typename OpT>
+
+using BaseOperatorType = typename OperatorTraits<OpT>::base_type;
+template <typename OpT>
+
+using InputDataParallelMatType =
+  typename OperatorTraits<OpT>::input_data_parallel_mat_type;
+template <typename OpT>
+using OutputDataParallelMatType =
+  typename OperatorTraits<OpT>::output_data_parallel_mat_type;
+
+template <typename OpT>
+using InputModelParallelMatType =
+  typename OperatorTraits<OpT>::input_model_parallel_mat_type;
+template <typename OpT>
+using OutputModelParallelMatType =
+  typename OperatorTraits<OpT>::output_model_parallel_mat_type;
+
+template <typename OpT>
+using InputTensorType = typename OperatorTraits<OpT>::input_tensor_type;
+template <typename OpT>
+using OutputTensorType = typename OperatorTraits<OpT>::output_tensor_type;
+
+template <typename OpT>
+using InputConstTensorType =
+  typename OperatorTraits<OpT>::input_const_tensor_type;
+template <typename OpT>
+using OutputConstTensorType =
+  typename OperatorTraits<OpT>::output_const_tensor_type;
+
+} // namespace lbann
+
+#endif // LBANN_SRC_OPERATORS_MATH_UNIT_TEST_OPERATORTRAITS_HPP_INCLUDED
diff --git a/src/operators/math/unit_test/abs_test.cpp b/src/operators/math/unit_test/abs_test.cpp
new file mode 100644
index 00000000000..0a8962fab74
--- /dev/null
+++ b/src/operators/math/unit_test/abs_test.cpp
@@ -0,0 +1,313 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+// Testing framework stuff
+#include <catch2/catch.hpp>
+
+#include "MPITestHelpers.hpp"
+#include "MatrixHelpers.hpp"
+#include "TestHelpers.hpp"
+
+#include "OperatorTraits.hpp"
+
+// CUT
+#include "lbann/operators/math/abs.hpp"
+
+// Other stuff
+#include "lbann/proto/factories.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#include "lbann/proto/operator_factory_impl.hpp"
+
+#include <h2/meta/Core.hpp>
+#include <h2/meta/TypeList.hpp>
+
+#include <functional>
+#include <memory>
+#include <numeric>
+#include <operators.pb.h>
+
+using namespace lbann;
+
+// Define the list of operators to test. Basically this is
+// {float,double}x{CPU,GPU}.
+template <typename T>
+using AbsOperatorAllDevices = h2::meta::TL<
+#ifdef LBANN_HAS_GPU
+  AbsOperator<T, El::Device::GPU>,
+#endif // LBANN_HAS_GPU
+  AbsOperator<T, El::Device::CPU>>;
+
+using AllAbsOpTypes =
+  h2::meta::tlist::Append<
+#if !defined LBANN_HAS_ROCM
+  AbsOperatorAllDevices<El::Complex<float>>,
+  AbsOperatorAllDevices<El::Complex<double>>,
+#endif // LBANN_HAS_ROCM
+  AbsOperatorAllDevices<float>,
+  AbsOperatorAllDevices<double>>;
+
+namespace lbann {
+template <typename T, El::Device D>
+struct OperatorTraits<AbsOperator<T, D>>
+  : OperatorTraits<Operator<T, El::Base<T>, D>>
+{
+};
+} // namespace lbann
+
+// Save some typing.
+using unit_test::utilities::IsValidPtr;
+
+TEMPLATE_LIST_TEST_CASE("Abs operator lifecycle",
+                        "[mpi][operator][math][abs][lifecycle]",
+                        AllAbsOpTypes)
+{
+  using ThisOpType = TestType;
+
+  SECTION("Construction with valid arguments")
+  {
+    std::unique_ptr<ThisOpType> op_ptr = nullptr;
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+  }
+  SECTION("Copy interface")
+  {
+    std::unique_ptr<ThisOpType> clone_ptr = nullptr;
+    REQUIRE_NOTHROW(clone_ptr = ThisOpType{}.clone());
+    CHECK(clone_ptr->get_type() == "abs");
+
+    ThisOpType op;
+    REQUIRE_NOTHROW(op = *clone_ptr);
+  }
+  SECTION("Construct from protobuf")
+  {
+    using InputDataType = InputValueType<ThisOpType>;
+    using OutputDataType = OutputValueType<ThisOpType>;
+    constexpr auto D = Device<ThisOpType>;
+
+    lbann_data::Operator proto_op;
+    ThisOpType{}.write_proto(proto_op);
+
+    std::unique_ptr<BaseOperatorType<ThisOpType>> base_ptr = nullptr;
+    REQUIRE_NOTHROW(
+      base_ptr =
+        proto::construct_operator<InputDataType, OutputDataType, D>(proto_op));
+    CHECK(base_ptr->get_type() == "abs");
+
+    auto* specific_ptr = dynamic_cast<ThisOpType*>(base_ptr.get());
+    CHECK(IsValidPtr(specific_ptr));
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("Abs operator action",
+                        "[mpi][operator][math][abs][action]",
+                        AllAbsOpTypes)
+{
+  using ThisOpType = TestType;
+  using InputDataType = InputValueType<ThisOpType>;
+  using OutputDataType = OutputValueType<ThisOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+  auto const& g = world_comm.get_trainer_grid();
+
+  // Some common data
+  ThisOpType op;
+
+  El::Int const height = 23;
+  El::Int const width = 17;
+  InputDataParallelMatType<ThisOpType> input(height, width, g, 0),
+    grad_wrt_input(height, width, g, 0),
+    true_grad_wrt_input(height, width, g, 0);
+  OutputDataParallelMatType<ThisOpType> output(height, width, g, 0),
+    grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+  SECTION("Data parallel - all values positive real")
+  {
+    // Setup inputs/outputs
+    El::Fill(input, InputDataType{2.f});
+    El::Fill(true_output, OutputDataType{2.f});
+
+    El::MakeUniform(grad_wrt_output);
+    El::Copy(grad_wrt_output, true_grad_wrt_input);
+
+    El::Fill(output, OutputDataType{-32.f});        // Fill out of range.
+    El::Fill(grad_wrt_input, InputDataType{-24.f}); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+
+  SECTION("Data parallel - all values negative real")
+  {
+    // Setup inputs/outputs
+    El::Fill(input, InputDataType{-2.f});
+    El::Fill(true_output, OutputDataType{2.f});
+
+    El::MakeUniform(grad_wrt_output);
+    El::Copy(grad_wrt_output, true_grad_wrt_input);
+    El::Scale(El::To<InputDataType>(-1.), true_grad_wrt_input);
+
+    El::Fill(output, OutputDataType{-32.f});        // Fill out of range.
+    El::Fill(grad_wrt_input, InputDataType{-24.f}); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+
+  // SECTION("Data parallel - all values out of range")
+  // {
+  //   // Setup inputs/outputs
+  //   El::MakeUniform(input, El::To<DataType>(4), El::To<DataType>(1));
+  //   El::Fill(output, El::To<DataType>(-2.0));
+  //   El::Fill(true_output, El::To<DataType>(1.0));
+
+  //   El::MakeUniform(grad_wrt_output);
+  //   El::Fill(grad_wrt_input, El::To<DataType>(-1.0));
+  //   El::Fill(true_grad_wrt_input, El::To<DataType>(0.0));
+
+  //   CHECK_FALSE(true_output == output);
+  //   REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+  //   CHECK(true_output == output);
+
+  //   REQUIRE_NOTHROW(
+  //     op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+  //   CHECK(true_grad_wrt_input == grad_wrt_input);
+  // }
+}
+
+TEMPLATE_LIST_TEST_CASE("Abs operator serialization",
+                        "[mpi][operator][math][abs][serialize]",
+                        AllAbsOpTypes)
+{
+  using ThisOpType = TestType;
+  using BaseOpType = BaseOperatorType<ThisOpType>;
+  using BaseOpPtr = std::unique_ptr<BaseOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+
+  auto const& g = world_comm.get_trainer_grid();
+  utils::grid_manager mgr(g);
+
+  std::stringstream ss;
+
+  // Create the objects
+  ThisOpType src_operator, tgt_operator;
+  BaseOpPtr src_operator_ptr = std::make_unique<ThisOpType>(), tgt_operator_ptr;
+
+#ifdef LBANN_HAS_CEREAL_BINARY_ARCHIVES
+  SECTION("Binary archive")
+  {
+    {
+      cereal::BinaryOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::BinaryInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    CHECK_NOTHROW(dynamic_cast<ThisOpType const&>(*src_operator_ptr));
+    CHECK_NOTHROW(dynamic_cast<ThisOpType const&>(*tgt_operator_ptr));
+  }
+
+  SECTION("Rooted binary archive")
+  {
+    {
+      RootedBinaryOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedBinaryInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    CHECK_NOTHROW(dynamic_cast<ThisOpType const&>(*src_operator_ptr));
+    CHECK_NOTHROW(dynamic_cast<ThisOpType const&>(*tgt_operator_ptr));
+  }
+#endif // LBANN_HAS_CEREAL_BINARY_ARCHIVES
+
+#ifdef LBANN_HAS_CEREAL_XML_ARCHIVES
+  SECTION("XML archive")
+  {
+    {
+      cereal::XMLOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::XMLInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    CHECK_NOTHROW(dynamic_cast<ThisOpType const&>(*src_operator_ptr));
+    CHECK_NOTHROW(dynamic_cast<ThisOpType const&>(*tgt_operator_ptr));
+  }
+
+  SECTION("Rooted XML archive")
+  {
+    {
+      RootedXMLOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedXMLInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    CHECK_NOTHROW(dynamic_cast<ThisOpType const&>(*src_operator_ptr));
+    CHECK_NOTHROW(dynamic_cast<ThisOpType const&>(*tgt_operator_ptr));
+  }
+#endif // LBANN_HAS_CEREAL_XML_ARCHIVES
+}
diff --git a/src/operators/math/unit_test/add_test.cpp b/src/operators/math/unit_test/add_test.cpp
new file mode 100644
index 00000000000..ed475c93b7f
--- /dev/null
+++ b/src/operators/math/unit_test/add_test.cpp
@@ -0,0 +1,308 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+// Testing framework stuff
+#include <catch2/catch.hpp>
+
+#include "MPITestHelpers.hpp"
+#include "MatrixHelpers.hpp"
+#include "TestHelpers.hpp"
+
+#include "OperatorTraits.hpp"
+
+// CUT
+#include "lbann/operators/math/binary.hpp"
+
+// Other stuff
+#include "lbann/proto/factories.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#include <h2/meta/Core.hpp>
+#include <h2/meta/TypeList.hpp>
+
+#include <functional>
+#include <memory>
+#include <numeric>
+#include <operators.pb.h>
+
+using namespace lbann;
+
+// Define the list of operators to test. Basically this is
+// {float,double}x{CPU,GPU}.
+template <typename T>
+using AddOperatorAllDevices = h2::meta::TL<
+#ifdef LBANN_HAS_GPU
+  AddOperator<T, El::Device::GPU>,
+#endif // LBANN_HAS_GPU
+  AddOperator<T, El::Device::CPU>>;
+
+using AllAddOpTypes = h2::meta::tlist::Append<AddOperatorAllDevices<float>,
+                                              AddOperatorAllDevices<double>>;
+
+namespace lbann {
+template <typename T, El::Device D>
+struct OperatorTraits<AddOperator<T, D>> : OperatorTraits<Operator<T, T, D>>
+{
+};
+} // namespace lbann
+
+// Save some typing.
+using unit_test::utilities::IsValidPtr;
+
+TEMPLATE_LIST_TEST_CASE("Add operator lifecycle",
+                        "[mpi][operator][math][add][lifecycle]",
+                        AllAddOpTypes)
+{
+  using ThisOpType = TestType;
+
+  SECTION("Construction with valid arguments")
+  {
+    std::unique_ptr<ThisOpType> op_ptr = nullptr;
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+  }
+  SECTION("Copy interface")
+  {
+    std::unique_ptr<ThisOpType> clone_ptr = nullptr;
+    REQUIRE_NOTHROW(clone_ptr = ThisOpType{}.clone());
+
+    ThisOpType op;
+    REQUIRE_NOTHROW(op = *clone_ptr);
+  }
+  SECTION("Construct from protobuf")
+  {
+    constexpr auto D = Device<ThisOpType>;
+    using InT = InputValueType<ThisOpType>;
+    using OutT = OutputValueType<ThisOpType>;
+
+    lbann_data::Operator proto_op;
+    ThisOpType{}.write_proto(proto_op);
+
+    std::unique_ptr<BaseOperatorType<ThisOpType>> base_ptr = nullptr;
+    REQUIRE_NOTHROW(base_ptr =
+                      proto::construct_operator<InT, OutT, D>(proto_op));
+    CHECK(base_ptr->get_type() == "add");
+
+    auto* specific_ptr = dynamic_cast<ThisOpType*>(base_ptr.get());
+    CHECK(IsValidPtr(specific_ptr));
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("Add operator action",
+                        "[mpi][operator][math][add][action]",
+                        AllAddOpTypes)
+{
+  using ThisOpType = TestType;
+  using InOutDataType = InputValueType<ThisOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+  auto const& g = world_comm.get_trainer_grid();
+
+  // Some common data
+  ThisOpType op;
+
+  El::Int const height = 13;
+  El::Int const width = 17;
+
+  SECTION("Data parallel")
+  {
+    InputDataParallelMatType<ThisOpType> input0(height, width, g, 0),
+      input1(height, width, g, 0), grad_wrt_input0(height, width, g, 0),
+      grad_wrt_input1(height, width, g, 0),
+      true_grad_wrt_input0(height, width, g, 0),
+      true_grad_wrt_input1(height, width, g, 0);
+    OutputDataParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input0, El::To<InOutDataType>(1.));
+    El::Fill(input1, El::To<InOutDataType>(2.));
+    El::Fill(true_output, El::To<InOutDataType>(3.));
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input0 = grad_wrt_output;
+    true_grad_wrt_input1 = grad_wrt_output;
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input0,
+             El::To<InOutDataType>(-42.)); // Fill out of range.
+    El::Fill(grad_wrt_input1,
+             El::To<InOutDataType>(-52.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input0, input1}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(op.bp_compute({input0, input1},
+                                  {grad_wrt_output},
+                                  {grad_wrt_input0, grad_wrt_input1}));
+    CHECK(true_grad_wrt_input0 == grad_wrt_input0);
+    CHECK(true_grad_wrt_input1 == grad_wrt_input1);
+  }
+
+  SECTION("Model parallel")
+  {
+    InputModelParallelMatType<ThisOpType> input0(height, width, g, 0),
+      input1(height, width, g, 0), grad_wrt_input0(height, width, g, 0),
+      grad_wrt_input1(height, width, g, 0),
+      true_grad_wrt_input0(height, width, g, 0),
+      true_grad_wrt_input1(height, width, g, 0);
+    OutputModelParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input0, El::To<InOutDataType>(1.));
+    El::Fill(input1, El::To<InOutDataType>(2.));
+    El::Fill(true_output, El::To<InOutDataType>(3.));
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input0 = grad_wrt_output;
+    true_grad_wrt_input1 = grad_wrt_output;
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input0,
+             El::To<InOutDataType>(-42.)); // Fill out of range.
+    El::Fill(grad_wrt_input1,
+             El::To<InOutDataType>(-52.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input0, input1}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(op.bp_compute({input0, input1},
+                                  {grad_wrt_output},
+                                  {grad_wrt_input0, grad_wrt_input1}));
+    CHECK(true_grad_wrt_input0 == grad_wrt_input0);
+    CHECK(true_grad_wrt_input1 == grad_wrt_input1);
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("Add operator serialization",
+                        "[mpi][operator][math][add][serialize]",
+                        AllAddOpTypes)
+{
+  using ThisOpType = TestType;
+  using BaseOpType = BaseOperatorType<ThisOpType>;
+  using BaseOpPtr = std::unique_ptr<BaseOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+
+  auto const& g = world_comm.get_trainer_grid();
+  utils::grid_manager mgr(g);
+
+  std::stringstream ss;
+
+  // Create the objects
+  ThisOpType src_operator, tgt_operator;
+  BaseOpPtr src_operator_ptr = std::make_unique<ThisOpType>(), tgt_operator_ptr;
+
+#ifdef LBANN_HAS_CEREAL_BINARY_ARCHIVES
+  SECTION("Binary archive")
+  {
+    {
+      cereal::BinaryOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::BinaryInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+
+  SECTION("Rooted binary archive")
+  {
+    {
+      RootedBinaryOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedBinaryInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+#endif // LBANN_HAS_CEREAL_BINARY_ARCHIVES
+
+#ifdef LBANN_HAS_CEREAL_XML_ARCHIVES
+  SECTION("XML archive")
+  {
+    {
+      cereal::XMLOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::XMLInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+
+  SECTION("Rooted XML archive")
+  {
+    {
+      RootedXMLOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedXMLInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+#endif // LBANN_HAS_CEREAL_XML_ARCHIVES
+}
diff --git a/src/operators/math/unit_test/clamp_test.cpp b/src/operators/math/unit_test/clamp_test.cpp
index 4989b92324f..8b5bb715ff0 100644
--- a/src/operators/math/unit_test/clamp_test.cpp
+++ b/src/operators/math/unit_test/clamp_test.cpp
@@ -31,6 +31,8 @@
 #include "MatrixHelpers.hpp"
 #include "TestHelpers.hpp"
 
+#include "OperatorTraits.hpp"
+
 // CUT
 #include "lbann/operators/math/clamp.hpp"
 
@@ -46,52 +48,27 @@
 #include <numeric>
 #include <operators.pb.h>
 
+using namespace lbann;
+
 // Define the list of operators to test. Basically this is
 // {float,double}x{CPU,GPU}.
 template <typename T>
 using ClampOperatorAllDevices = h2::meta::TL<
 #ifdef LBANN_HAS_GPU
-  lbann::ClampOperator<T, El::Device::GPU>,
+  ClampOperator<T, El::Device::GPU>,
 #endif // LBANN_HAS_GPU
-  lbann::ClampOperator<T, El::Device::CPU>>;
+  ClampOperator<T, El::Device::CPU>>;
 
 using AllClampOpTypes =
   h2::meta::tlist::Append<ClampOperatorAllDevices<float>,
                           ClampOperatorAllDevices<double>>;
 
-template <typename T>
-struct OperatorTraits;
-
+namespace lbann {
 template <typename T, El::Device D>
-struct OperatorTraits<lbann::ClampOperator<T, D>>
+struct OperatorTraits<ClampOperator<T, D>> : OperatorTraits<Operator<T, T, D>>
 {
-  using value_type = T;
-  using base_type = lbann::Operator<T, T, D>;
-  using data_parallel_mat_type =
-    El::DistMatrix<T, El::Dist::STAR, El::Dist::VC, El::DistWrap::ELEMENT, D>;
-  using model_parallel_mat_type =
-    El::DistMatrix<T, El::Dist::MC, El::Dist::MR, El::DistWrap::ELEMENT, D>;
-  using tensor_type = lbann::utils::DistTensorView<T, D>;
-  using const_tensor_type = lbann::utils::ConstDistTensorView<T, D>;
-  static constexpr El::Device device = D;
 };
-
-template <typename OpT>
-using ValueType = typename OperatorTraits<OpT>::value_type;
-template <typename OpT>
-using BaseOperatorType = typename OperatorTraits<OpT>::base_type;
-template <typename OpT>
-using DataParallelMatType =
-  typename OperatorTraits<OpT>::data_parallel_mat_type;
-template <typename OpT>
-using ModelParallelMatType =
-  typename OperatorTraits<OpT>::model_parallel_mat_type;
-template <typename OpT>
-constexpr auto DeviceAlloc = OperatorTraits<OpT>::device;
-template <typename OpT>
-using TensorType = typename OperatorTraits<OpT>::tensor_type;
-template <typename OpT>
-using ConstTensorType = typename OperatorTraits<OpT>::const_tensor_type;
+} // namespace lbann
 
 // Save some typing.
 using unit_test::utilities::IsValidPtr;
@@ -101,56 +78,57 @@ TEMPLATE_LIST_TEST_CASE("Clamp operator lifecycle",
                         AllClampOpTypes)
 {
   using ThisOpType = TestType;
-  using DataType = ValueType<ThisOpType>;
+  using InOutDataType = InputValueType<ThisOpType>;
 
+  auto AsOkType = [](auto const& x) { return El::To<InOutDataType>(x); };
   SECTION("Construction with valid arguments")
   {
     std::unique_ptr<ThisOpType> op_ptr = nullptr;
     REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>(0., 1.));
     REQUIRE(IsValidPtr(op_ptr));
-    CHECK(op_ptr->get_min() == El::To<DataType>(0.0));
-    CHECK(op_ptr->get_max() == El::To<DataType>(1.0));
+    CHECK(op_ptr->get_min() == AsOkType(0.));
+    CHECK(op_ptr->get_max() == AsOkType(1.));
 
     REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>(1., 1.));
     REQUIRE(IsValidPtr(op_ptr));
-    CHECK(op_ptr->get_min() == El::To<DataType>(1.0));
-    CHECK(op_ptr->get_max() == El::To<DataType>(1.0));
+    CHECK(op_ptr->get_min() == AsOkType(1.));
+    CHECK(op_ptr->get_max() == AsOkType(1.));
   }
   SECTION("Construction with invalid arguments")
   {
     std::unique_ptr<ThisOpType> op_ptr = nullptr;
-    CHECK_THROWS(op_ptr = std::make_unique<ThisOpType>(1.0, 0.0));
+    CHECK_THROWS(op_ptr = std::make_unique<ThisOpType>(1., 0.));
     CHECK_FALSE(IsValidPtr(op_ptr));
   }
   SECTION("Copy interface")
   {
     std::unique_ptr<ThisOpType> clone_ptr = nullptr;
-    REQUIRE_NOTHROW(clone_ptr = ThisOpType(1.0, 3.0).clone());
-    CHECK(clone_ptr->get_min() == El::To<DataType>(1.0));
-    CHECK(clone_ptr->get_max() == El::To<DataType>(3.0));
+    REQUIRE_NOTHROW(clone_ptr = ThisOpType{1., 3.}.clone());
+    CHECK(clone_ptr->get_min() == AsOkType(1.));
+    CHECK(clone_ptr->get_max() == AsOkType(3.));
 
-    ThisOpType op(0.0, 1.0);
+    ThisOpType op(0., 1.);
     REQUIRE_NOTHROW(op = *clone_ptr);
 
-    CHECK(op.get_min() == El::To<DataType>(1.0));
-    CHECK(op.get_max() == El::To<DataType>(3.0));
+    CHECK(op.get_min() == AsOkType(1.));
+    CHECK(op.get_max() == AsOkType(3.));
   }
   SECTION("Construct from protobuf")
   {
-    constexpr auto D = DeviceAlloc<ThisOpType>;
+    constexpr auto D = Device<ThisOpType>;
     lbann_data::Operator proto_op;
-    ThisOpType(-2.0, 5.0).write_proto(proto_op);
+    ThisOpType{-2., 5.}.write_proto(proto_op);
 
     std::unique_ptr<BaseOperatorType<ThisOpType>> base_ptr = nullptr;
     REQUIRE_NOTHROW(
       base_ptr =
-        lbann::proto::construct_operator<DataType, DataType, D>(proto_op));
+        proto::construct_operator<InOutDataType, InOutDataType, D>(proto_op));
     CHECK(base_ptr->get_type() == "clamp");
 
     auto* specific_ptr = dynamic_cast<ThisOpType*>(base_ptr.get());
-    CHECK((bool)specific_ptr);
-    CHECK(specific_ptr->get_min() == El::To<DataType>(-2.0));
-    CHECK(specific_ptr->get_max() == El::To<DataType>(5.0));
+    CHECK(IsValidPtr(specific_ptr));
+    CHECK(specific_ptr->get_min() == AsOkType(-2.));
+    CHECK(specific_ptr->get_max() == AsOkType(5.));
   }
 }
 
@@ -159,21 +137,23 @@ TEMPLATE_LIST_TEST_CASE("Clamp operator action",
                         AllClampOpTypes)
 {
   using ThisOpType = TestType;
-  using DataType = ValueType<ThisOpType>;
+  using InOutDataType = InputValueType<ThisOpType>;
 
   auto& world_comm = unit_test::utilities::current_world_comm();
   auto const& g = world_comm.get_trainer_grid();
 
   // Some common data
-  ThisOpType op(El::To<DataType>(-1.0), El::To<DataType>(1.0));
+  ThisOpType op(-1., 1.);
 
   El::Int const height = 13;
   El::Int const width = 17;
-  DataParallelMatType<ThisOpType> input(height, width, g, 0),
-    output(height, width, g, 0), grad_wrt_output(height, width, g, 0),
-    grad_wrt_input(height, width, g, 0), true_output(height, width, g, 0),
+  InputDataParallelMatType<ThisOpType> input(height, width, g, 0),
+    grad_wrt_input(height, width, g, 0),
     true_grad_wrt_input(height, width, g, 0);
+  OutputDataParallelMatType<ThisOpType> output(height, width, g, 0),
+    grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
 
+  auto AsOkType = [](auto const& x) { return El::To<InOutDataType>(x); };
   SECTION("Data parallel - all values in range")
   {
     // Setup inputs/outputs
@@ -183,8 +163,8 @@ TEMPLATE_LIST_TEST_CASE("Clamp operator action",
     El::MakeUniform(grad_wrt_output);
     true_grad_wrt_input = grad_wrt_output;
 
-    El::Fill(output, El::To<DataType>(2.0));         // Fill out of range.
-    El::Fill(grad_wrt_input, El::To<DataType>(4.0)); // Fill out of range.
+    El::Fill(output, AsOkType(2.));         // Fill out of range.
+    El::Fill(grad_wrt_input, AsOkType(4.)); // Fill out of range.
 
     CHECK_FALSE(true_output == output);
     REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
@@ -198,13 +178,13 @@ TEMPLATE_LIST_TEST_CASE("Clamp operator action",
   SECTION("Data parallel - all values out of range")
   {
     // Setup inputs/outputs
-    El::MakeUniform(input, El::To<DataType>(4), El::To<DataType>(1));
-    El::Fill(output, El::To<DataType>(-2.0));
-    El::Fill(true_output, El::To<DataType>(1.0));
+    El::MakeUniform(input, AsOkType(4), AsOkType(1));
+    El::Fill(output, AsOkType(-2.));
+    El::Fill(true_output, AsOkType(1.));
 
     El::MakeUniform(grad_wrt_output);
-    El::Fill(grad_wrt_input, El::To<DataType>(-1.0));
-    El::Fill(true_grad_wrt_input, El::To<DataType>(0.0));
+    El::Fill(grad_wrt_input, AsOkType(-1.));
+    El::Fill(true_grad_wrt_input, AsOkType(0.));
 
     CHECK_FALSE(true_output == output);
     REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
@@ -228,13 +208,13 @@ TEMPLATE_LIST_TEST_CASE("Clamp operator serialization",
   // int const size_of_world = world_comm.get_procs_in_world();
 
   auto const& g = world_comm.get_trainer_grid();
-  lbann::utils::grid_manager mgr(g);
+  utils::grid_manager mgr(g);
 
   std::stringstream ss;
 
   // Create the objects
-  ThisOpType src_operator(1.f, 2.f), tgt_operator(0.f, 1.f);
-  BaseOpPtr src_operator_ptr = std::make_unique<ThisOpType>(3.f, 4.f),
+  ThisOpType src_operator(1., 2.), tgt_operator(0., 1.);
+  BaseOpPtr src_operator_ptr = std::make_unique<ThisOpType>(3., 4.),
             tgt_operator_ptr;
 
 #ifdef LBANN_HAS_CEREAL_BINARY_ARCHIVES
@@ -269,13 +249,13 @@ TEMPLATE_LIST_TEST_CASE("Clamp operator serialization",
   SECTION("Rooted binary archive")
   {
     {
-      lbann::RootedBinaryOutputArchive oarchive(ss, g);
+      RootedBinaryOutputArchive oarchive(ss, g);
       REQUIRE_NOTHROW(oarchive(src_operator));
       REQUIRE_NOTHROW(oarchive(src_operator_ptr));
     }
 
     {
-      lbann::RootedBinaryInputArchive iarchive(ss, g);
+      RootedBinaryInputArchive iarchive(ss, g);
       REQUIRE_NOTHROW(iarchive(tgt_operator));
       REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
       CHECK(IsValidPtr(tgt_operator_ptr));
@@ -327,13 +307,13 @@ TEMPLATE_LIST_TEST_CASE("Clamp operator serialization",
   SECTION("Rooted XML archive")
   {
     {
-      lbann::RootedXMLOutputArchive oarchive(ss, g);
+      RootedXMLOutputArchive oarchive(ss, g);
       REQUIRE_NOTHROW(oarchive(src_operator));
       REQUIRE_NOTHROW(oarchive(src_operator_ptr));
     }
 
     {
-      lbann::RootedXMLInputArchive iarchive(ss, g);
+      RootedXMLInputArchive iarchive(ss, g);
       REQUIRE_NOTHROW(iarchive(tgt_operator));
       REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
       CHECK(IsValidPtr(tgt_operator_ptr));
diff --git a/src/operators/math/unit_test/cos_test.cpp b/src/operators/math/unit_test/cos_test.cpp
new file mode 100644
index 00000000000..ee944c96398
--- /dev/null
+++ b/src/operators/math/unit_test/cos_test.cpp
@@ -0,0 +1,321 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+// Testing framework stuff
+#include <catch2/catch.hpp>
+
+#include "MPITestHelpers.hpp"
+#include "MatrixHelpers.hpp"
+#include "TestHelpers.hpp"
+
+#include "OperatorTraits.hpp"
+
+// CUT
+#include "lbann/operators/math/unary.hpp"
+
+// Other stuff
+#include "lbann/proto/factories.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#include <h2/meta/Core.hpp>
+#include <h2/meta/TypeList.hpp>
+
+#include <functional>
+#include <h2/meta/core/Lazy.hpp>
+#include <matrices.hpp>
+#include <memory>
+#include <numeric>
+#include <operators.pb.h>
+
+#include <math.h>
+#if defined M_PI
+#define LBANN_PI M_PI
+#else
+#define LBANN_PI 3.14159265358979323846264338327
+#endif // defined M_PI
+
+using namespace lbann;
+
+// Define the list of operators to test. Basically this is
+// {float,double}x{CPU,GPU}.
+template <typename T>
+using CosOperatorAllDevices = h2::meta::TL<
+#ifdef LBANN_HAS_GPU
+  CosOperator<T, El::Device::GPU>,
+#endif // LBANN_HAS_GPU
+  CosOperator<T, El::Device::CPU>>;
+
+using AllCosOpTypes = h2::meta::tlist::Append<CosOperatorAllDevices<float>,
+                                              CosOperatorAllDevices<double>>;
+
+namespace lbann {
+template <typename T, El::Device D>
+struct OperatorTraits<CosOperator<T, D>> : OperatorTraits<Operator<T, T, D>>
+{
+};
+} // namespace lbann
+
+template <typename CosOpT>
+struct MakeSinOpT
+{
+  using type = SinOperator<InputValueType<CosOpT>, Device<CosOpT>>;
+};
+
+template <typename CosOpT>
+using GetSinOperator = h2::meta::Force<MakeSinOpT<CosOpT>>;
+
+// Save some typing.
+using unit_test::utilities::IsValidPtr;
+
+TEMPLATE_LIST_TEST_CASE("Cos operator lifecycle",
+                        "[mpi][operator][math][cosine][lifecycle]",
+                        AllCosOpTypes)
+{
+  using ThisOpType = TestType;
+
+  SECTION("Construction with valid arguments")
+  {
+    std::unique_ptr<ThisOpType> op_ptr = nullptr;
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+  }
+  SECTION("Copy interface")
+  {
+    std::unique_ptr<ThisOpType> clone_ptr = nullptr;
+    REQUIRE_NOTHROW(clone_ptr = ThisOpType{}.clone());
+
+    ThisOpType op;
+    REQUIRE_NOTHROW(op = *clone_ptr);
+  }
+  SECTION("Construct from protobuf")
+  {
+    constexpr auto D = Device<ThisOpType>;
+    using InT = InputValueType<ThisOpType>;
+    using OutT = OutputValueType<ThisOpType>;
+
+    lbann_data::Operator proto_op;
+    ThisOpType{}.write_proto(proto_op);
+
+    std::unique_ptr<BaseOperatorType<ThisOpType>> base_ptr = nullptr;
+    REQUIRE_NOTHROW(base_ptr =
+                      proto::construct_operator<InT, OutT, D>(proto_op));
+    CHECK(base_ptr->get_type() == "cosine");
+
+    auto* specific_ptr = dynamic_cast<ThisOpType*>(base_ptr.get());
+    CHECK(IsValidPtr(specific_ptr));
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("Cos operator action",
+                        "[mpi][operator][math][cosine][action]",
+                        AllCosOpTypes)
+{
+  using ThisOpType = TestType;
+  using InOutDataType = InputValueType<ThisOpType>;
+  using SinOpType = GetSinOperator<ThisOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+  auto const& g = world_comm.get_trainer_grid();
+
+  // Some common data
+  ThisOpType op;
+  SinOpType sin_op;
+
+  El::Int const height = 13;
+  El::Int const width = 17;
+
+  SECTION("Data parallel")
+  {
+    // Main objects
+    InputDataParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputDataParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Zero(input);
+    Fill(true_output, El::To<InOutDataType>(1.));
+
+    El::MakeUniform(grad_wrt_output);
+
+    // Compute the true gradient wrt input
+    sin_op.fp_compute({input}, {output});
+    El::Hadamard(grad_wrt_output, output, true_grad_wrt_input);
+    El::Scale(-1., true_grad_wrt_input);
+
+    // Fill the output with garbage.
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-42.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+
+  SECTION("Model parallel")
+  {
+    InputModelParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputModelParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input, El::To<InOutDataType>(LBANN_PI));
+    El::Fill(true_output, El::To<InOutDataType>(-1.));
+
+    El::MakeUniform(grad_wrt_output);
+
+    sin_op.fp_compute({input}, {output});
+    El::Hadamard(grad_wrt_output, output, true_grad_wrt_input);
+    El::Scale(-1., true_grad_wrt_input);
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-42.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("Cos operator serialization",
+                        "[mpi][operator][math][cosine][serialize]",
+                        AllCosOpTypes)
+{
+  using ThisOpType = TestType;
+  using BaseOpType = BaseOperatorType<ThisOpType>;
+  using BaseOpPtr = std::unique_ptr<BaseOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+
+  auto const& g = world_comm.get_trainer_grid();
+  utils::grid_manager mgr(g);
+
+  std::stringstream ss;
+
+  // Create the objects
+  ThisOpType src_operator, tgt_operator;
+  BaseOpPtr src_operator_ptr = std::make_unique<ThisOpType>(), tgt_operator_ptr;
+
+#ifdef LBANN_HAS_CEREAL_BINARY_ARCHIVES
+  SECTION("Binary archive")
+  {
+    {
+      cereal::BinaryOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::BinaryInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+
+  SECTION("Rooted binary archive")
+  {
+    {
+      RootedBinaryOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedBinaryInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+#endif // LBANN_HAS_CEREAL_BINARY_ARCHIVES
+
+#ifdef LBANN_HAS_CEREAL_XML_ARCHIVES
+  SECTION("XML archive")
+  {
+    {
+      cereal::XMLOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::XMLInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+
+  SECTION("Rooted XML archive")
+  {
+    {
+      RootedXMLOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedXMLInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+#endif // LBANN_HAS_CEREAL_XML_ARCHIVES
+}
diff --git a/src/operators/math/unit_test/multiply_test.cpp b/src/operators/math/unit_test/multiply_test.cpp
new file mode 100644
index 00000000000..2a71cd47666
--- /dev/null
+++ b/src/operators/math/unit_test/multiply_test.cpp
@@ -0,0 +1,310 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+// Testing framework stuff
+#include <catch2/catch.hpp>
+
+#include "MPITestHelpers.hpp"
+#include "MatrixHelpers.hpp"
+#include "TestHelpers.hpp"
+
+#include "OperatorTraits.hpp"
+
+// CUT
+#include "lbann/operators/math/binary.hpp"
+
+// Other stuff
+#include "lbann/proto/factories.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#include <h2/meta/Core.hpp>
+#include <h2/meta/TypeList.hpp>
+
+#include <functional>
+#include <memory>
+#include <numeric>
+#include <operators.pb.h>
+
+using namespace lbann;
+
+// Define the list of operators to test. Basically this is
+// {float,double}x{CPU,GPU}.
+template <typename T>
+using MultiplyOperatorAllDevices = h2::meta::TL<
+#ifdef LBANN_HAS_GPU
+  MultiplyOperator<T, El::Device::GPU>,
+#endif // LBANN_HAS_GPU
+  MultiplyOperator<T, El::Device::CPU>>;
+
+using AllMultiplyOpTypes =
+  h2::meta::tlist::Append<MultiplyOperatorAllDevices<float>,
+                          MultiplyOperatorAllDevices<double>>;
+
+namespace lbann {
+template <typename T, El::Device D>
+struct OperatorTraits<MultiplyOperator<T, D>>
+  : OperatorTraits<Operator<T, T, D>>
+{
+};
+} // namespace lbann
+
+// Save some typing.
+using unit_test::utilities::IsValidPtr;
+
+TEMPLATE_LIST_TEST_CASE("Multiply operator lifecycle",
+                        "[mpi][operator][math][multiply][lifecycle]",
+                        AllMultiplyOpTypes)
+{
+  using ThisOpType = TestType;
+
+  SECTION("Construction with valid arguments")
+  {
+    std::unique_ptr<ThisOpType> op_ptr = nullptr;
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+  }
+  SECTION("Copy interface")
+  {
+    std::unique_ptr<ThisOpType> clone_ptr = nullptr;
+    REQUIRE_NOTHROW(clone_ptr = ThisOpType{}.clone());
+
+    ThisOpType op;
+    REQUIRE_NOTHROW(op = *clone_ptr);
+  }
+  SECTION("Construct from protobuf")
+  {
+    constexpr auto D = Device<ThisOpType>;
+    using InT = InputValueType<ThisOpType>;
+    using OutT = OutputValueType<ThisOpType>;
+
+    lbann_data::Operator proto_op;
+    ThisOpType{}.write_proto(proto_op);
+
+    std::unique_ptr<BaseOperatorType<ThisOpType>> base_ptr = nullptr;
+    REQUIRE_NOTHROW(base_ptr =
+                      proto::construct_operator<InT, OutT, D>(proto_op));
+    CHECK(base_ptr->get_type() == "multiply");
+
+    auto* specific_ptr = dynamic_cast<ThisOpType*>(base_ptr.get());
+    CHECK(IsValidPtr(specific_ptr));
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("Multiply operator action",
+                        "[mpi][operator][math][multiply][action]",
+                        AllMultiplyOpTypes)
+{
+  using ThisOpType = TestType;
+  using InOutDataType = InputValueType<ThisOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+  auto const& g = world_comm.get_trainer_grid();
+
+  // Some common data
+  ThisOpType op;
+
+  El::Int const height = 13;
+  El::Int const width = 17;
+
+  SECTION("Data parallel")
+  {
+    InputDataParallelMatType<ThisOpType> input0(height, width, g, 0),
+      input1(height, width, g, 0), grad_wrt_input0(height, width, g, 0),
+      grad_wrt_input1(height, width, g, 0),
+      true_grad_wrt_input0(height, width, g, 0),
+      true_grad_wrt_input1(height, width, g, 0);
+    OutputDataParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input0, El::To<InOutDataType>(5.));
+    El::Fill(input1, El::To<InOutDataType>(2.));
+    El::Fill(true_output, El::To<InOutDataType>(10.));
+
+    El::MakeUniform(grad_wrt_output);
+    El::Hadamard(input1, grad_wrt_output, true_grad_wrt_input0);
+    El::Hadamard(input0, grad_wrt_output, true_grad_wrt_input1);
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input0,
+             El::To<InOutDataType>(-42.)); // Fill out of range.
+    El::Fill(grad_wrt_input1,
+             El::To<InOutDataType>(-52.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input0, input1}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(op.bp_compute({input0, input1},
+                                  {grad_wrt_output},
+                                  {grad_wrt_input0, grad_wrt_input1}));
+    CHECK(true_grad_wrt_input0 == grad_wrt_input0);
+    CHECK(true_grad_wrt_input1 == grad_wrt_input1);
+  }
+
+  SECTION("Model parallel")
+  {
+    InputModelParallelMatType<ThisOpType> input0(height, width, g, 0),
+      input1(height, width, g, 0), grad_wrt_input0(height, width, g, 0),
+      grad_wrt_input1(height, width, g, 0),
+      true_grad_wrt_input0(height, width, g, 0),
+      true_grad_wrt_input1(height, width, g, 0);
+    OutputModelParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input0, El::To<InOutDataType>(3.));
+    El::Fill(input1, El::To<InOutDataType>(2.));
+    El::Fill(true_output, El::To<InOutDataType>(6.));
+
+    El::MakeUniform(grad_wrt_output);
+    El::Hadamard(input1, grad_wrt_output, true_grad_wrt_input0);
+    El::Hadamard(input0, grad_wrt_output, true_grad_wrt_input1);
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input0,
+             El::To<InOutDataType>(-42.)); // Fill out of range.
+    El::Fill(grad_wrt_input1,
+             El::To<InOutDataType>(-52.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input0, input1}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(op.bp_compute({input0, input1},
+                                  {grad_wrt_output},
+                                  {grad_wrt_input0, grad_wrt_input1}));
+    CHECK(true_grad_wrt_input0 == grad_wrt_input0);
+    CHECK(true_grad_wrt_input1 == grad_wrt_input1);
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("Multiply operator serialization",
+                        "[mpi][operator][math][multiply][serialize]",
+                        AllMultiplyOpTypes)
+{
+  using ThisOpType = TestType;
+  using BaseOpType = BaseOperatorType<ThisOpType>;
+  using BaseOpPtr = std::unique_ptr<BaseOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+
+  auto const& g = world_comm.get_trainer_grid();
+  utils::grid_manager mgr(g);
+
+  std::stringstream ss;
+
+  // Create the objects
+  ThisOpType src_operator, tgt_operator;
+  BaseOpPtr src_operator_ptr = std::make_unique<ThisOpType>(), tgt_operator_ptr;
+
+#ifdef LBANN_HAS_CEREAL_BINARY_ARCHIVES
+  SECTION("Binary archive")
+  {
+    {
+      cereal::BinaryOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::BinaryInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+
+  SECTION("Rooted binary archive")
+  {
+    {
+      RootedBinaryOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedBinaryInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+#endif // LBANN_HAS_CEREAL_BINARY_ARCHIVES
+
+#ifdef LBANN_HAS_CEREAL_XML_ARCHIVES
+  SECTION("XML archive")
+  {
+    {
+      cereal::XMLOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::XMLInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+
+  SECTION("Rooted XML archive")
+  {
+    {
+      RootedXMLOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedXMLInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+#endif // LBANN_HAS_CEREAL_XML_ARCHIVES
+}
diff --git a/src/operators/math/unit_test/sin_test.cpp b/src/operators/math/unit_test/sin_test.cpp
new file mode 100644
index 00000000000..a9ff188a67f
--- /dev/null
+++ b/src/operators/math/unit_test/sin_test.cpp
@@ -0,0 +1,319 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+// Testing framework stuff
+#include <catch2/catch.hpp>
+
+#include "MPITestHelpers.hpp"
+#include "MatrixHelpers.hpp"
+#include "TestHelpers.hpp"
+
+#include "OperatorTraits.hpp"
+
+// CUT
+#include "lbann/operators/math/unary.hpp"
+
+// Other stuff
+#include "lbann/proto/factories.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#include <h2/meta/Core.hpp>
+#include <h2/meta/TypeList.hpp>
+
+#include <functional>
+#include <h2/meta/core/Lazy.hpp>
+#include <memory>
+#include <numeric>
+#include <operators.pb.h>
+
+#include <math.h>
+#if defined M_PI_2
+#define LBANN_PI_2 M_PI_2
+#else
+#define LBANN_PI 3.14159265358979323846264338327
+#define LBANN_PI_2 LBANN_PI / 2.0
+#endif // defined M_PI_2
+
+using namespace lbann;
+
+// Define the list of operators to test. Basically this is
+// {float,double}x{CPU,GPU}.
+template <typename T>
+using SinOperatorAllDevices = h2::meta::TL<
+#ifdef LBANN_HAS_GPU
+  SinOperator<T, El::Device::GPU>,
+#endif // LBANN_HAS_GPU
+  SinOperator<T, El::Device::CPU>>;
+
+using AllSinOpTypes = h2::meta::tlist::Append<SinOperatorAllDevices<float>,
+                                              SinOperatorAllDevices<double>>;
+
+namespace lbann {
+template <typename T, El::Device D>
+struct OperatorTraits<SinOperator<T, D>> : OperatorTraits<Operator<T, T, D>>
+{
+};
+} // namespace lbann
+
+template <typename SinOpT>
+struct MakeCosOpT
+{
+  using type = CosOperator<InputValueType<SinOpT>, Device<SinOpT>>;
+};
+
+template <typename SinOpT>
+using GetCosOperator = h2::meta::Force<MakeCosOpT<SinOpT>>;
+
+// Save some typing.
+using unit_test::utilities::IsValidPtr;
+
+TEMPLATE_LIST_TEST_CASE("Sin operator lifecycle",
+                        "[mpi][operator][math][sine][lifecycle]",
+                        AllSinOpTypes)
+{
+  using ThisOpType = TestType;
+
+  SECTION("Construction with valid arguments")
+  {
+    std::unique_ptr<ThisOpType> op_ptr = nullptr;
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+  }
+  SECTION("Copy interface")
+  {
+    std::unique_ptr<ThisOpType> clone_ptr = nullptr;
+    REQUIRE_NOTHROW(clone_ptr = ThisOpType{}.clone());
+
+    ThisOpType op;
+    REQUIRE_NOTHROW(op = *clone_ptr);
+  }
+  SECTION("Construct from protobuf")
+  {
+    constexpr auto D = Device<ThisOpType>;
+    using InT = InputValueType<ThisOpType>;
+    using OutT = OutputValueType<ThisOpType>;
+
+    lbann_data::Operator proto_op;
+    ThisOpType{}.write_proto(proto_op);
+
+    std::unique_ptr<BaseOperatorType<ThisOpType>> base_ptr = nullptr;
+    REQUIRE_NOTHROW(base_ptr =
+                      proto::construct_operator<InT, OutT, D>(proto_op));
+    CHECK(base_ptr->get_type() == "sine");
+
+    auto* specific_ptr = dynamic_cast<ThisOpType*>(base_ptr.get());
+    CHECK(IsValidPtr(specific_ptr));
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("Sin operator action",
+                        "[mpi][operator][math][sine][action]",
+                        AllSinOpTypes)
+{
+  using ThisOpType = TestType;
+  using InOutDataType = InputValueType<ThisOpType>;
+  using CosOpType = GetCosOperator<ThisOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+  auto const& g = world_comm.get_trainer_grid();
+
+  // Some common data
+  ThisOpType op;
+  CosOpType cos_op;
+
+  El::Int const height = 13;
+  El::Int const width = 17;
+
+  SECTION("Data parallel")
+  {
+    // Main objects
+    InputDataParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputDataParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input, El::To<InOutDataType>(M_PI_2));
+    El::Fill(true_output, El::To<InOutDataType>(1.));
+
+    El::MakeUniform(grad_wrt_output);
+
+    // Compute the true gradient wrt input
+    cos_op.fp_compute({input}, {output});
+    El::Hadamard(grad_wrt_output, output, true_grad_wrt_input);
+
+    // Fill the output with garbage.
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-42.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+
+  SECTION("Model parallel")
+  {
+    InputModelParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputModelParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input, El::To<InOutDataType>(3. * LBANN_PI_2));
+    El::Fill(true_output, El::To<InOutDataType>(-1.));
+
+    El::MakeUniform(grad_wrt_output);
+
+    cos_op.fp_compute({input}, {output});
+    El::Hadamard(grad_wrt_output, output, true_grad_wrt_input);
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-42.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("Sin operator serialization",
+                        "[mpi][operator][math][sine][serialize]",
+                        AllSinOpTypes)
+{
+  using ThisOpType = TestType;
+  using BaseOpType = BaseOperatorType<ThisOpType>;
+  using BaseOpPtr = std::unique_ptr<BaseOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+
+  auto const& g = world_comm.get_trainer_grid();
+  utils::grid_manager mgr(g);
+
+  std::stringstream ss;
+
+  // Create the objects
+  ThisOpType src_operator, tgt_operator;
+  BaseOpPtr src_operator_ptr = std::make_unique<ThisOpType>(), tgt_operator_ptr;
+
+#ifdef LBANN_HAS_CEREAL_BINARY_ARCHIVES
+  SECTION("Binary archive")
+  {
+    {
+      cereal::BinaryOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::BinaryInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+
+  SECTION("Rooted binary archive")
+  {
+    {
+      RootedBinaryOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedBinaryInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+#endif // LBANN_HAS_CEREAL_BINARY_ARCHIVES
+
+#ifdef LBANN_HAS_CEREAL_XML_ARCHIVES
+  SECTION("XML archive")
+  {
+    {
+      cereal::XMLOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::XMLInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+
+  SECTION("Rooted XML archive")
+  {
+    {
+      RootedXMLOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedXMLInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+#endif // LBANN_HAS_CEREAL_XML_ARCHIVES
+}
diff --git a/src/operators/math/unit_test/subtract_test.cpp b/src/operators/math/unit_test/subtract_test.cpp
new file mode 100644
index 00000000000..bc83fae3e3d
--- /dev/null
+++ b/src/operators/math/unit_test/subtract_test.cpp
@@ -0,0 +1,312 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+// Testing framework stuff
+#include <catch2/catch.hpp>
+
+#include "MPITestHelpers.hpp"
+#include "MatrixHelpers.hpp"
+#include "TestHelpers.hpp"
+
+#include "OperatorTraits.hpp"
+
+// CUT
+#include "lbann/operators/math/binary.hpp"
+
+// Other stuff
+#include "lbann/proto/factories.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#include <h2/meta/Core.hpp>
+#include <h2/meta/TypeList.hpp>
+
+#include <functional>
+#include <memory>
+#include <numeric>
+#include <operators.pb.h>
+
+using namespace lbann;
+
+// Define the list of operators to test. Basically this is
+// {float,double}x{CPU,GPU}.
+template <typename T>
+using SubtractOperatorAllDevices = h2::meta::TL<
+#ifdef LBANN_HAS_GPU
+  SubtractOperator<T, El::Device::GPU>,
+#endif // LBANN_HAS_GPU
+  SubtractOperator<T, El::Device::CPU>>;
+
+using AllSubtractOpTypes =
+  h2::meta::tlist::Append<SubtractOperatorAllDevices<float>,
+                          SubtractOperatorAllDevices<double>>;
+
+namespace lbann {
+template <typename T, El::Device D>
+struct OperatorTraits<SubtractOperator<T, D>>
+  : OperatorTraits<Operator<T, T, D>>
+{
+};
+} // namespace lbann
+
+// Save some typing.
+using unit_test::utilities::IsValidPtr;
+
+TEMPLATE_LIST_TEST_CASE("Subtract operator lifecycle",
+                        "[mpi][operator][math][subtract][lifecycle]",
+                        AllSubtractOpTypes)
+{
+  using ThisOpType = TestType;
+
+  SECTION("Construction with valid arguments")
+  {
+    std::unique_ptr<ThisOpType> op_ptr = nullptr;
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+  }
+  SECTION("Copy interface")
+  {
+    std::unique_ptr<ThisOpType> clone_ptr = nullptr;
+    REQUIRE_NOTHROW(clone_ptr = ThisOpType{}.clone());
+
+    ThisOpType op;
+    REQUIRE_NOTHROW(op = *clone_ptr);
+  }
+  SECTION("Construct from protobuf")
+  {
+    constexpr auto D = Device<ThisOpType>;
+    using InT = InputValueType<ThisOpType>;
+    using OutT = OutputValueType<ThisOpType>;
+
+    lbann_data::Operator proto_op;
+    ThisOpType{}.write_proto(proto_op);
+
+    std::unique_ptr<BaseOperatorType<ThisOpType>> base_ptr = nullptr;
+    REQUIRE_NOTHROW(base_ptr =
+                      proto::construct_operator<InT, OutT, D>(proto_op));
+    CHECK(base_ptr->get_type() == "subtract");
+
+    auto* specific_ptr = dynamic_cast<ThisOpType*>(base_ptr.get());
+    CHECK(IsValidPtr(specific_ptr));
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("Subtract operator action",
+                        "[mpi][operator][math][subtract][action]",
+                        AllSubtractOpTypes)
+{
+  using ThisOpType = TestType;
+  using InOutDataType = InputValueType<ThisOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+  auto const& g = world_comm.get_trainer_grid();
+
+  // Some common data
+  ThisOpType op;
+
+  El::Int const height = 13;
+  El::Int const width = 17;
+
+  SECTION("Data parallel")
+  {
+    InputDataParallelMatType<ThisOpType> input0(height, width, g, 0),
+      input1(height, width, g, 0), grad_wrt_input0(height, width, g, 0),
+      grad_wrt_input1(height, width, g, 0),
+      true_grad_wrt_input0(height, width, g, 0),
+      true_grad_wrt_input1(height, width, g, 0);
+    OutputDataParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input0, El::To<InOutDataType>(5.));
+    El::Fill(input1, El::To<InOutDataType>(2.));
+    El::Fill(true_output, El::To<InOutDataType>(3.));
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input0 = grad_wrt_output;
+    true_grad_wrt_input1 = grad_wrt_output;
+    El::Scale(El::To<InOutDataType>(-1.), true_grad_wrt_input1);
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input0,
+             El::To<InOutDataType>(-42.)); // Fill out of range.
+    El::Fill(grad_wrt_input1,
+             El::To<InOutDataType>(-52.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input0, input1}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(op.bp_compute({input0, input1},
+                                  {grad_wrt_output},
+                                  {grad_wrt_input0, grad_wrt_input1}));
+    CHECK(true_grad_wrt_input0 == grad_wrt_input0);
+    CHECK(true_grad_wrt_input1 == grad_wrt_input1);
+  }
+
+  SECTION("Model parallel")
+  {
+    InputModelParallelMatType<ThisOpType> input0(height, width, g, 0),
+      input1(height, width, g, 0), grad_wrt_input0(height, width, g, 0),
+      grad_wrt_input1(height, width, g, 0),
+      true_grad_wrt_input0(height, width, g, 0),
+      true_grad_wrt_input1(height, width, g, 0);
+    OutputModelParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input0, El::To<InOutDataType>(3.));
+    El::Fill(input1, El::To<InOutDataType>(2.));
+    El::Fill(true_output, El::To<InOutDataType>(1.));
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input0 = grad_wrt_output;
+    true_grad_wrt_input1 = grad_wrt_output;
+    El::Scale(El::To<InOutDataType>(-1.), true_grad_wrt_input1);
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input0,
+             El::To<InOutDataType>(-42.)); // Fill out of range.
+    El::Fill(grad_wrt_input1,
+             El::To<InOutDataType>(-52.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input0, input1}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(op.bp_compute({input0, input1},
+                                  {grad_wrt_output},
+                                  {grad_wrt_input0, grad_wrt_input1}));
+    CHECK(true_grad_wrt_input0 == grad_wrt_input0);
+    CHECK(true_grad_wrt_input1 == grad_wrt_input1);
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("Subtract operator serialization",
+                        "[mpi][operator][math][subtract][serialize]",
+                        AllSubtractOpTypes)
+{
+  using ThisOpType = TestType;
+  using BaseOpType = BaseOperatorType<ThisOpType>;
+  using BaseOpPtr = std::unique_ptr<BaseOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+
+  auto const& g = world_comm.get_trainer_grid();
+  utils::grid_manager mgr(g);
+
+  std::stringstream ss;
+
+  // Create the objects
+  ThisOpType src_operator, tgt_operator;
+  BaseOpPtr src_operator_ptr = std::make_unique<ThisOpType>(), tgt_operator_ptr;
+
+#ifdef LBANN_HAS_CEREAL_BINARY_ARCHIVES
+  SECTION("Binary archive")
+  {
+    {
+      cereal::BinaryOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::BinaryInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+
+  SECTION("Rooted binary archive")
+  {
+    {
+      RootedBinaryOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedBinaryInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+#endif // LBANN_HAS_CEREAL_BINARY_ARCHIVES
+
+#ifdef LBANN_HAS_CEREAL_XML_ARCHIVES
+  SECTION("XML archive")
+  {
+    {
+      cereal::XMLOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::XMLInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+
+  SECTION("Rooted XML archive")
+  {
+    {
+      RootedXMLOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedXMLInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get())));
+  }
+#endif // LBANN_HAS_CEREAL_XML_ARCHIVES
+}
diff --git a/src/proto/datatype.proto b/src/proto/datatype.proto
index bd98fcc8986..bb80585fa89 100644
--- a/src/proto/datatype.proto
+++ b/src/proto/datatype.proto
@@ -32,6 +32,8 @@ enum DataType {
   FLOAT = 0;
   DOUBLE = 1;
   FP16 = 2;
+  COMPLEX_FLOAT = 3;
+  COMPLEX_DOUBLE = 4;
 }
 
 enum DeviceAllocation {
diff --git a/src/proto/factories/layer_factory.cpp b/src/proto/factories/layer_factory.cpp
index 1757e14daa6..25ce063c3d7 100644
--- a/src/proto/factories/layer_factory.cpp
+++ b/src/proto/factories/layer_factory.cpp
@@ -31,7 +31,6 @@
 
 #include "lbann/layers/layer.hpp"
 #include "lbann/layers/operator_layer.hpp"
-#include "lbann/layers/activations/activations.hpp"
 #include "lbann/layers/activations/elu.hpp"
 #include "lbann/layers/activations/identity.hpp"
 #include "lbann/layers/activations/leaky_relu.hpp"
@@ -53,7 +52,6 @@
 #include "lbann/layers/learning/gru.hpp"
 #include "lbann/layers/loss/categorical_accuracy.hpp"
 #include "lbann/layers/loss/cross_entropy.hpp"
-#include "lbann/layers/loss/entrywise.hpp"
 #include "lbann/layers/loss/l1_norm.hpp"
 #include "lbann/layers/loss/l2_norm2.hpp"
 #include "lbann/layers/loss/mean_absolute_error.hpp"
@@ -192,56 +190,7 @@ class factory_manager
     LBANN_REGISTER_BUILDER(GRU, gru);
 
     // Math layers
-    LBANN_REGISTER_BUILDER(Abs, abs);
-    LBANN_REGISTER_BUILDER(Acos, acos);
-    LBANN_REGISTER_BUILDER(Acosh, acosh);
-    LBANN_REGISTER_BUILDER(Add, add);
-    LBANN_REGISTER_BUILDER(Asin, asin);
-    LBANN_REGISTER_BUILDER(Asinh, asinh);
-    LBANN_REGISTER_BUILDER(Atan, atan);
-    LBANN_REGISTER_BUILDER(Atanh, atanh);
-    LBANN_REGISTER_BUILDER(Ceil, ceil);
-    LBANN_REGISTER_BUILDER(Cos, cos);
-    LBANN_REGISTER_BUILDER(Cosh, cosh);
-    LBANN_REGISTER_BUILDER(Divide, divide);
-    LBANN_REGISTER_BUILDER(Equal, equal);
-    LBANN_REGISTER_BUILDER(Exp, exp);
-    LBANN_REGISTER_BUILDER(Expm1, expm1);
-    LBANN_REGISTER_BUILDER(Floor, floor);
-    LBANN_REGISTER_BUILDER(Greater, greater);
-    LBANN_REGISTER_BUILDER(GreaterEqual, greater_equal);
-    LBANN_REGISTER_BUILDER(Erf, erf);
-    LBANN_REGISTER_BUILDER(ErfInv, erfinv);
-    LBANN_REGISTER_BUILDER(Less, less);
-    LBANN_REGISTER_BUILDER(LessEqual, less_equal);
-    LBANN_REGISTER_BUILDER(Log, log);
-    LBANN_REGISTER_BUILDER(Log1p, log1p);
-    LBANN_REGISTER_BUILDER(LogicalAnd, logical_and);
-    LBANN_REGISTER_BUILDER(LogicalNot, logical_not);
-    LBANN_REGISTER_BUILDER(LogicalOr, logical_or);
-    LBANN_REGISTER_BUILDER(LogicalXor, logical_xor);
     LBANN_REGISTER_BUILDER(MatMul, matmul);
-    LBANN_REGISTER_BUILDER(Max, max);
-    LBANN_REGISTER_BUILDER(Min, min);
-    LBANN_REGISTER_BUILDER(Mod, mod);
-    LBANN_REGISTER_BUILDER(Multiply, multiply);
-    LBANN_REGISTER_BUILDER(Negative, negative);
-    LBANN_REGISTER_BUILDER(NotEqual, not_equal);
-    LBANN_REGISTER_BUILDER(Pow, pow);
-    LBANN_REGISTER_BUILDER(Reciprocal, reciprocal);
-    LBANN_REGISTER_BUILDER(Round, round);
-    LBANN_REGISTER_BUILDER(Rsqrt, rsqrt);
-    LBANN_REGISTER_BUILDER(SafeDivide, safe_divide);
-    LBANN_REGISTER_BUILDER(SafeReciprocal, safe_reciprocal);
-    LBANN_REGISTER_BUILDER(Sign, sign);
-    LBANN_REGISTER_BUILDER(Sin, sin);
-    LBANN_REGISTER_BUILDER(Sinh, sinh);
-    LBANN_REGISTER_BUILDER(Sqrt, sqrt);
-    LBANN_REGISTER_BUILDER(Square, square);
-    LBANN_REGISTER_BUILDER(SquaredDifference, squared_difference);
-    LBANN_REGISTER_BUILDER(Subtract, subtract);
-    LBANN_REGISTER_BUILDER(Tan, tan);
-    LBANN_REGISTER_BUILDER(Tanh, tanh);
 
     // Transform layers
     LBANN_REGISTER_BUILDER(BatchwiseReduceSum, batchwise_reduce_sum);
@@ -267,26 +216,16 @@ class factory_manager
 
     // Activations
     LBANN_REGISTER_DEFAULT_BUILDER(Identity, identity);
-    LBANN_REGISTER_DEFAULT_BUILDER_WITH_COMM(LogSigmoid, log_sigmoid);
     LBANN_REGISTER_DEFAULT_BUILDER_WITH_COMM(LogSoftmax, log_softmax);
     LBANN_REGISTER_DEFAULT_BUILDER_WITH_COMM(Relu, relu);
-    LBANN_REGISTER_DEFAULT_BUILDER_WITH_COMM(Selu, selu);
-    LBANN_REGISTER_DEFAULT_BUILDER_WITH_COMM(Sigmoid, sigmoid);
     LBANN_REGISTER_BUILDER(Softmax, softmax);
-    LBANN_REGISTER_DEFAULT_BUILDER_WITH_COMM(Softplus, softplus);
-    LBANN_REGISTER_DEFAULT_BUILDER_WITH_COMM(Softsign, softsign);
 
     // Loss Layers
-    LBANN_REGISTER_DEFAULT_BUILDER_WITH_COMM(BinaryCrossEntropy, binary_cross_entropy);
-    LBANN_REGISTER_DEFAULT_BUILDER_WITH_COMM(BooleanAccuracy, boolean_accuracy);
-    LBANN_REGISTER_DEFAULT_BUILDER_WITH_COMM(BooleanFalseNegative, boolean_false_negative);
-    LBANN_REGISTER_DEFAULT_BUILDER_WITH_COMM(BooleanFalsePositive, boolean_false_positive);
     LBANN_REGISTER_DEFAULT_BUILDER_WITH_COMM(CategoricalAccuracy, categorical_accuracy);
     LBANN_REGISTER_DEFAULT_BUILDER_WITH_COMM(L1Norm, l1_norm);
     LBANN_REGISTER_DEFAULT_BUILDER_WITH_COMM(L2Norm2, l2_norm2);
     LBANN_REGISTER_DEFAULT_BUILDER_WITH_COMM(MeanAbsoluteError, mean_absolute_error);
     LBANN_REGISTER_DEFAULT_BUILDER_WITH_COMM(MeanSquaredError, mean_squared_error);
-    LBANN_REGISTER_DEFAULT_BUILDER_WITH_COMM(SigmoidBinaryCrossEntropy, sigmoid_binary_cross_entropy);
 
     // Regularizer layers
     LBANN_REGISTER_BUILDER(Dropout, dropout);
@@ -661,7 +600,7 @@ std::unique_ptr<Layer> construct_layer_legacy(
                   "a data-parallel layout and on CPU");
     }
   }
-  
+
   if (proto_layer.has_rotation()) {
     if (Layout == data_layout::DATA_PARALLEL && Device == El::Device::CPU) {
       return lbann::make_unique<rotation_layer<TensorDataType, data_layout::DATA_PARALLEL, El::Device::CPU>>(comm);
diff --git a/src/proto/layers.proto b/src/proto/layers.proto
index 79a995998fe..311a2f38499 100644
--- a/src/proto/layers.proto
+++ b/src/proto/layers.proto
@@ -118,62 +118,8 @@ message Layer {
     TopKCategoricalAccuracy top_k_categorical_accuracy = 64;
     L2Norm2 l2_norm2 = 65;
     L1Norm l1_norm = 66;
-    BinaryCrossEntropy binary_cross_entropy = 67;
-    SigmoidBinaryCrossEntropy sigmoid_binary_cross_entropy = 68;
-    BooleanAccuracy boolean_accuracy = 69;
-    BooleanFalseNegative boolean_false_negative = 70;
-    BooleanFalsePositive boolean_false_positive = 71;
 
     // Math layers
-    LogicalNot logical_not = 401;
-    Abs abs = 402;
-    Negative negative = 403;
-    Sign sign = 404;
-    Round round = 405;
-    Ceil ceil = 406;
-    Floor floor = 407;
-    Reciprocal reciprocal = 408;
-    Square square = 409;
-    Sqrt sqrt = 410;
-    Rsqrt rsqrt = 411;
-    SafeReciprocal safe_reciprocal = 412;
-    Exp exp = 413;
-    Expm1 expm1 = 414;
-    Log log = 415;
-    Log1p log1p = 416;
-    Cos cos = 417;
-    Sin sin = 418;
-    Tan tan = 419;
-    Acos acos = 420;
-    Asin asin = 421;
-    Atan atan = 422;
-    Cosh cosh = 423;
-    Sinh sinh = 424;
-    Tanh tanh = 425;
-    Acosh acosh = 426;
-    Asinh asinh = 427;
-    Atanh atanh = 428;
-    Erf erf = 429;
-    ErfInv erfinv = 430;
-    Add add = 450;
-    Subtract subtract = 451;
-    Multiply multiply = 452;
-    Divide divide = 453;
-    Mod mod = 454;
-    Pow pow = 455;
-    SafeDivide safe_divide = 456;
-    SquaredDifference squared_difference = 457;
-    Max max = 458;
-    Min min = 459;
-    Equal equal = 460;
-    NotEqual not_equal = 461;
-    Less less = 462;
-    LessEqual less_equal = 463;
-    Greater greater = 464;
-    GreaterEqual greater_equal = 465;
-    LogicalAnd logical_and = 466;
-    LogicalOr logical_or = 467;
-    LogicalXor logical_xor = 468;
     MatMul matmul = 470;
     DFTAbs dft_abs = 471;
 
@@ -190,14 +136,9 @@ message Layer {
     Elu elu = 200;
     Identity identity = 201;
     LeakyRelu leaky_relu = 202;
-    LogSigmoid log_sigmoid = 203;
     LogSoftmax log_softmax = 204;
     Relu relu = 205;
-    Selu selu = 206;
-    Sigmoid sigmoid = 207;
     Softmax softmax = 208;
-    Softplus softplus = 209;
-    Softsign softsign = 210;
 
     // Image layers
     BilinearResize bilinear_resize = 500;
@@ -230,55 +171,6 @@ message Layer {
   ///////////////////////
   // Math layers       //
   ///////////////////////
-  message LogicalNot {}
-  message Abs {}
-  message Negative {}
-  message Sign {}
-  message Round {}
-  message Ceil {}
-  message Floor {}
-  message Reciprocal {}
-  message Square {}
-  message Sqrt {}
-  message Rsqrt {}
-  message SafeReciprocal {}
-  message Exp {}
-  message Expm1 {}
-  message Log {}
-  message Log1p {}
-  message Cos {}
-  message Sin {}
-  message Tan {}
-  message Acos {}
-  message Asin {}
-  message Atan {}
-  message Cosh {}
-  message Sinh {}
-  message Tanh {}
-  message Acosh {}
-  message Asinh {}
-  message Atanh {}
-  message Erf {}
-  message ErfInv {}
-  message Add {}
-  message Subtract {}
-  message Multiply {}
-  message Divide {}
-  message Mod {}
-  message Pow {}
-  message SafeDivide {}
-  message SquaredDifference {}
-  message Max {}
-  message Min {}
-  message Equal {}
-  message NotEqual {}
-  message Less {}
-  message LessEqual {}
-  message Greater {}
-  message GreaterEqual {}
-  message LogicalAnd {}
-  message LogicalOr {}
-  message LogicalXor {}
   message DFTAbs {}
 
   /** @brief Matrix multiplication.
@@ -308,7 +200,6 @@ message Layer {
   message LeakyRelu {
     double negative_slope = 1; //default: 0.01
   }
-  message LogSigmoid {}
 
   /** @brief Logarithm of softmax function.
    *
@@ -317,8 +208,6 @@ message Layer {
   message LogSoftmax {}
 
   message Relu {}
-  message Selu {}
-  message Sigmoid {}
 
   /**
    *  @f[ \text{softmax}(x)_i = \frac{e^{x_i}}{\sum_j e^{x_j}} @f]
@@ -327,9 +216,6 @@ message Layer {
     string softmax_mode = 1; // default: "instance"; should be "instance" or "channel"
   }
 
-  message Softplus {}
-  message Softsign {}
-
   ///////////////////////
   // Loss layers //
   ///////////////////////
@@ -344,11 +230,6 @@ message Layer {
   }
   message L2Norm2 {}
   message L1Norm {}
-  message BinaryCrossEntropy {}
-  message SigmoidBinaryCrossEntropy {}
-  message BooleanAccuracy {}
-  message BooleanFalseNegative {}
-  message BooleanFalsePositive {}
 
   ///////////////////////////
   // Regularization layers //
@@ -576,14 +457,14 @@ message Layer {
     *
     *  The first input tensor is the values and the second is the
     *  indices. For the 1D case the inputs must have the same dimensions.
-    *  For the 2D case, the inputs must have either the same number of 
+    *  For the 2D case, the inputs must have either the same number of
     *  columns (axis=0) or the same number of rows (axis=1). If
     *  an index is out-of-range, it is ignored.
     *
-    *  @note: In the 2D case the output dimensions change depending on the 
-    *  axis. For an (m,n) values input, the output dims are expected to 
-    *  be (*,n) for Axis == 0 and (m,*) for Axis == 1. 
-    *   
+    *  @note: In the 2D case the output dimensions change depending on the
+    *  axis. For an (m,n) values input, the output dims are expected to
+    *  be (*,n) for Axis == 0 and (m,*) for Axis == 1.
+    *
     *  @todo Only flat tensors are currently supported. For higher-order
     *  tensors, PyTorch
     *  (https://pytorch.org/docs/master/tensors.html#torch.Tensor.scatter_)
@@ -619,10 +500,10 @@ message Layer {
     *  the index tensor. If an index is out-of-range, the corresponding
     *  output is set to zero.
     *
-    *  @note: In the 2D case, the output dimension changes according 
-    *  to the axis. For an (m,n) values input, and a (k) indices input, 
+    *  @note: In the 2D case, the output dimension changes according
+    *  to the axis. For an (m,n) values input, and a (k) indices input,
     *  the output tensor will be (k,n) for Axis == 0 and (m, k) for
-    *  Axis == 1  
+    *  Axis == 1
     */
   message Gather {
 
@@ -818,8 +699,8 @@ message Layer {
   /** @brief Rotate a image clockwise around its center, then shear , then translate
    *
    *  Expects 4 inputs: a 3D image tensor in CHW format, a scalar
-   *  rotation angle, a tensor for (X,Y) shear factor, a tensor 
-   *  for (X,Y) translate. 
+   *  rotation angle, a tensor for (X,Y) shear factor, a tensor
+   *  for (X,Y) translate.
    */
   message CompositeImageTransformation {}
 
diff --git a/src/proto/operators.proto b/src/proto/operators.proto
index fe70c262795..03e80ed0c40 100644
--- a/src/proto/operators.proto
+++ b/src/proto/operators.proto
@@ -40,7 +40,105 @@ message Operator {
   google.protobuf.Any parameters = 4;
 }
 
+/// @name Unary math layers.
+/// @{
+
 message ClampOperator {
   double min = 1;
   double max = 2;
 }
+
+message LogicalNotOperator {}
+message AbsOperator {}
+message NegativeOperator {}
+message SignOperator {}
+message RoundOperator {}
+message CeilOperator {}
+message FloorOperator {}
+message ReciprocalOperator {}
+message SquareOperator {}
+message SqrtOperator {}
+message RsqrtOperator {}
+message SafeReciprocalOperator {}
+message ExpOperator {}
+message Expm1Operator {}
+message LogOperator {}
+message Log1pOperator {}
+message CosOperator {}
+message SinOperator {}
+message TanOperator {}
+message AcosOperator {}
+message AsinOperator {}
+message AtanOperator {}
+message CoshOperator {}
+message SinhOperator {}
+message TanhOperator {}
+message AcoshOperator {}
+message AsinhOperator {}
+message AtanhOperator {}
+message ErfOperator {}
+message ErfInvOperator {}
+
+/// @}
+/// @name Binary math layers.
+/// @{
+
+message AddOperator {}
+message SubtractOperator {}
+message MultiplyOperator {}
+message DivideOperator {}
+message ModOperator {}
+message PowOperator {}
+message SafeDivideOperator {}
+message SquaredDifferenceOperator {}
+message MaxOperator {}
+message MinOperator {}
+message EqualOperator {}
+message NotEqualOperator {}
+message LessOperator {}
+message LessEqualOperator {}
+message GreaterOperator {}
+message GreaterEqualOperator {}
+message LogicalAndOperator {}
+message LogicalOrOperator {}
+message LogicalXorOperator {}
+
+/// @}
+/** @name Activation Operators */
+/// @{
+message LogSigmoidOperator {}
+
+/** @brief Logarithm of softmax function.
+ *
+ *  @f[ \log \text{softmax}(x)_i = x_i - \log \sum_j e^{x_j} @f]
+ */
+message LogSoftmaxOperator {}
+
+message SeluOperator {}
+message SigmoidOperator {}
+
+message SoftplusOperator {}
+message SoftsignOperator {}
+
+// message ReluOperator {}
+
+/**
+ *  @f[ \text{softmax}(x)_i = \frac{e^{x_i}}{\sum_j e^{x_j}} @f]
+ */
+// message SoftmaxOperator {
+//   enum SoftmaxMode {
+//     INSTANCE = 0;
+//     CHANNEL = 1;
+//   }
+//   SoftmaxMode softmax_mode = 1;
+// }
+
+///@}
+/** @brief Loss operators */
+///@{
+message BinaryCrossEntropyOperator {}
+message SigmoidBinaryCrossEntropyOperator {}
+message BooleanAccuracyOperator {}
+message BooleanFalseNegativeOperator {}
+message BooleanFalsePositiveOperator {}
+///@}
\ No newline at end of file
diff --git a/unit_test/utilities/TestHelpers.hpp b/unit_test/utilities/TestHelpers.hpp
index e9c17d8b250..1035e882c5e 100644
--- a/unit_test/utilities/TestHelpers.hpp
+++ b/unit_test/utilities/TestHelpers.hpp
@@ -39,6 +39,12 @@ bool IsValidPtr(std::unique_ptr<T> const& ptr) noexcept
   return static_cast<bool>(ptr);
 }
 
+template <typename T>
+bool IsValidPtr(T const* ptr) noexcept
+{
+  return static_cast<bool>(ptr);
+}
+
 } // namespace utilities
 } // namespace unit_test
 #endif // LBANN_UNIT_TEST_UTILITIES_TEST_HELPERS_HPP_INCLUDED

From 59d6588d49a0a7be6b5c1583de8adf9bb9157bf0 Mon Sep 17 00:00:00 2001
From: Tom Benson <30674819+benson31@users.noreply.github.com>
Date: Mon, 13 Sep 2021 13:43:02 -0400
Subject: [PATCH 02/37] Add a bunch of f(x,C)-type operators (#1963)

* Add a bunch of f(x,C)-type operators

* Add GPU implementations of the binary with constant operators
---
 .../operators/math/binary_with_constant.hpp   | 147 +++++
 .../lbann/operators/math/math_builders.hpp    |  14 +-
 .../operators/math/math_builders_impl.hpp     |  29 +
 include/lbann/proto/operator_factory_impl.hpp |  12 +
 src/operators/math/CMakeLists.txt             |   2 +
 src/operators/math/binary_with_constant.cpp   | 378 +++++++++++++
 src/operators/math/binary_with_constant.cu    | 507 ++++++++++++++++++
 .../math/cereal_registration/CMakeLists.txt   |  12 +
 .../math/cereal_registration/add_constant.cpp |  31 ++
 .../cereal_registration/constant_subtract.cpp |  31 ++
 .../cereal_registration/equal_constant.cpp    |  31 ++
 .../cereal_registration/greater_constant.cpp  |  31 ++
 .../greater_equal_constant.cpp                |  31 ++
 .../cereal_registration/less_constant.cpp     |  31 ++
 .../less_equal_constant.cpp                   |  31 ++
 .../math/cereal_registration/max_constant.cpp |  31 ++
 .../math/cereal_registration/min_constant.cpp |  31 ++
 .../not_equal_constant.cpp                    |  31 ++
 .../math/cereal_registration/scale.cpp        |  31 ++
 .../cereal_registration/subtract_constant.cpp |  31 ++
 src/operators/math/math_builders.cpp          |  12 +
 src/operators/math/unit_test/CMakeLists.txt   |   6 +
 .../math/unit_test/add_constant_test.cpp      | 318 +++++++++++
 .../math/unit_test/constant_subtract_test.cpp | 320 +++++++++++
 .../math/unit_test/equal_constant_test.cpp    | 379 +++++++++++++
 .../unit_test/not_equal_constant_test.cpp     | 379 +++++++++++++
 src/operators/math/unit_test/scale_test.cpp   | 319 +++++++++++
 .../math/unit_test/subtract_constant_test.cpp | 318 +++++++++++
 src/proto/operators.proto                     |  40 ++
 29 files changed, 3563 insertions(+), 1 deletion(-)
 create mode 100644 include/lbann/operators/math/binary_with_constant.hpp
 create mode 100644 src/operators/math/binary_with_constant.cpp
 create mode 100644 src/operators/math/binary_with_constant.cu
 create mode 100644 src/operators/math/cereal_registration/add_constant.cpp
 create mode 100644 src/operators/math/cereal_registration/constant_subtract.cpp
 create mode 100644 src/operators/math/cereal_registration/equal_constant.cpp
 create mode 100644 src/operators/math/cereal_registration/greater_constant.cpp
 create mode 100644 src/operators/math/cereal_registration/greater_equal_constant.cpp
 create mode 100644 src/operators/math/cereal_registration/less_constant.cpp
 create mode 100644 src/operators/math/cereal_registration/less_equal_constant.cpp
 create mode 100644 src/operators/math/cereal_registration/max_constant.cpp
 create mode 100644 src/operators/math/cereal_registration/min_constant.cpp
 create mode 100644 src/operators/math/cereal_registration/not_equal_constant.cpp
 create mode 100644 src/operators/math/cereal_registration/scale.cpp
 create mode 100644 src/operators/math/cereal_registration/subtract_constant.cpp
 create mode 100644 src/operators/math/unit_test/add_constant_test.cpp
 create mode 100644 src/operators/math/unit_test/constant_subtract_test.cpp
 create mode 100644 src/operators/math/unit_test/equal_constant_test.cpp
 create mode 100644 src/operators/math/unit_test/not_equal_constant_test.cpp
 create mode 100644 src/operators/math/unit_test/scale_test.cpp
 create mode 100644 src/operators/math/unit_test/subtract_constant_test.cpp

diff --git a/include/lbann/operators/math/binary_with_constant.hpp b/include/lbann/operators/math/binary_with_constant.hpp
new file mode 100644
index 00000000000..995c8ffaa1a
--- /dev/null
+++ b/include/lbann/operators/math/binary_with_constant.hpp
@@ -0,0 +1,147 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef LBANN_INCLUDE_LBANN_OPERATORS_BINARY_WITH_CONSTANT_HPP_INCLUDED
+#define LBANN_INCLUDE_LBANN_OPERATORS_BINARY_WITH_CONSTANT_HPP_INCLUDED
+
+#include "lbann_config.hpp"
+
+#include "lbann/operators/elementwise_operator.hpp"
+#include "lbann/utils/cloneable.hpp"
+
+#include <operators.pb.h>
+
+/** @file
+ *
+ *  These operators are idiomatic replacements for patterns like:
+ *
+ *    Op(layer, ConstantLayer(1))
+ *
+ *  where it's a pessimization to actually allocate a persistent array
+ *  for such an ephemeral operation.
+ */
+
+#include "lbann/operators/elementwise_operator.hpp"
+#include "lbann/operators/operator.hpp"
+#include "lbann/utils/cloneable.hpp"
+
+#include <operators.pb.h>
+
+// These are all single-type operators.
+
+#define LBANN_DECLARE_BINARY_WITH_CONSTANT_OPERATOR(OP_NAME, OP_STRING)        \
+  template <typename DataT, El::Device D>                                      \
+  class OP_NAME##Operator final                                                \
+    : public Cloneable<OP_NAME##Operator<DataT, D>,                            \
+                       ElementwiseOperator<DataT, DataT, D>>                   \
+  {                                                                            \
+    using BaseType = Cloneable<OP_NAME##Operator<DataT, D>,                    \
+                               ElementwiseOperator<DataT, DataT, D>>;          \
+    using LocalInputTensorType = typename BaseType::LocalInputTensorType;      \
+    using LocalOutputTensorType = typename BaseType::LocalOutputTensorType;    \
+    using ConstLocalInputTensorType =                                          \
+      typename BaseType::ConstLocalInputTensorType;                            \
+    using ConstLocalOutputTensorType =                                         \
+      typename BaseType::ConstLocalOutputTensorType;                           \
+                                                                               \
+  public:                                                                      \
+    OP_NAME##Operator(double constant = 0.)                                    \
+      : m_constant{El::To<DataT>(constant)}                                    \
+    {}                                                                         \
+    OP_NAME##Operator(OP_NAME##Operator&&) = default;                          \
+    OP_NAME##Operator(OP_NAME##Operator const&) = default;                     \
+    OP_NAME##Operator& operator=(OP_NAME##Operator&&) = default;               \
+    OP_NAME##Operator& operator=(OP_NAME##Operator const&) = default;          \
+    ~OP_NAME##Operator() = default;                                            \
+    std::string get_type() const final { return OP_STRING; }                   \
+    template <typename ArchiveT>                                               \
+    void serialize(ArchiveT& ar)                                               \
+    {                                                                          \
+      using OperatorType = ElementwiseOperator<DataT, DataT, D>;               \
+      ar(::cereal::make_nvp("ElementwiseOperator",                             \
+                            ::cereal::base_class<OperatorType>(this)),         \
+         CEREAL_NVP(m_constant));                                              \
+    }                                                                          \
+    DataT get_constant() const noexcept { return m_constant; }                 \
+                                                                               \
+  private:                                                                     \
+    void                                                                       \
+    fp_compute_local(std::vector<ConstLocalInputTensorType> inputs,            \
+                     std::vector<LocalOutputTensorType> outputs) const final;  \
+    void bp_compute_local(                                                     \
+      std::vector<ConstLocalInputTensorType> inputs,                           \
+      std::vector<ConstLocalOutputTensorType> grads_wrt_outputs,               \
+      std::vector<LocalInputTensorType> grads_wrt_inputs) const final;         \
+    void set_proto_params(lbann_data::Operator& msg) const final               \
+    {                                                                          \
+      lbann_data::OP_NAME##Operator op_msg;                                    \
+      op_msg.set_constant(m_constant);                                         \
+      msg.mutable_parameters()->PackFrom(op_msg);                              \
+    }                                                                          \
+    void do_fill_description(description& desc) const final                    \
+    {                                                                          \
+      std::ostringstream oss;                                                  \
+      oss << m_constant;                                                       \
+      desc.add("Constant", oss.str());                                         \
+    }                                                                          \
+                                                                               \
+  private:                                                                     \
+    DataT m_constant;                                                          \
+  }
+
+namespace lbann {
+
+// x + c -- treated as commutative.
+LBANN_DECLARE_BINARY_WITH_CONSTANT_OPERATOR(AddConstant, "add constant");
+
+// x + c -- treated as commutative.
+LBANN_DECLARE_BINARY_WITH_CONSTANT_OPERATOR(Scale, "scale");
+
+// x - C -- yes, could be "plus -C", but so could 7-4 be 7+-4, but
+// nobody writes that.
+LBANN_DECLARE_BINARY_WITH_CONSTANT_OPERATOR(SubtractConstant,
+                                            "subtract constant");
+// C - x -- yes, could be "negative-x plus C", but again, why write
+// -4+7 when you could just write 7-4...
+LBANN_DECLARE_BINARY_WITH_CONSTANT_OPERATOR(ConstantSubtract,
+                                            "subtract from constant");
+
+LBANN_DECLARE_BINARY_WITH_CONSTANT_OPERATOR(MaxConstant, "max constant");
+LBANN_DECLARE_BINARY_WITH_CONSTANT_OPERATOR(MinConstant, "min constant");
+
+LBANN_DECLARE_BINARY_WITH_CONSTANT_OPERATOR(EqualConstant, "equals constant");
+LBANN_DECLARE_BINARY_WITH_CONSTANT_OPERATOR(NotEqualConstant,
+                                            "not equals constant");
+LBANN_DECLARE_BINARY_WITH_CONSTANT_OPERATOR(LessEqualConstant,
+                                            "less-equals constant");
+LBANN_DECLARE_BINARY_WITH_CONSTANT_OPERATOR(LessConstant, "less than constant");
+LBANN_DECLARE_BINARY_WITH_CONSTANT_OPERATOR(GreaterEqualConstant,
+                                            "greater-equals constant");
+LBANN_DECLARE_BINARY_WITH_CONSTANT_OPERATOR(GreaterConstant,
+                                            "greater than constant");
+
+} // namespace lbann
+#endif // LBANN_INCLUDE_LBANN_OPERATORS_BINARY_WITH_CONSTANT_HPP_INCLUDED
diff --git a/include/lbann/operators/math/math_builders.hpp b/include/lbann/operators/math/math_builders.hpp
index 7a232d5927f..f2f4788b002 100644
--- a/include/lbann/operators/math/math_builders.hpp
+++ b/include/lbann/operators/math/math_builders.hpp
@@ -26,8 +26,8 @@
 #ifndef LBANN_INCLUDE_LBANN_OPERATORS_MATH_MATH_BUILDERS_HPP_INCLUDED
 #define LBANN_INCLUDE_LBANN_OPERATORS_MATH_MATH_BUILDERS_HPP_INCLUDED
 
-#include "lbann/operators/operator.hpp"
 #include "lbann/operators/builder_macros.hpp"
+#include "lbann/operators/operator.hpp"
 
 namespace lbann {
 
@@ -38,25 +38,32 @@ build_abs_operator(lbann_data::Operator const& op);
 LBANN_DECLARE_OPERATOR_BUILDER(acos);
 LBANN_DECLARE_OPERATOR_BUILDER(acosh);
 LBANN_DECLARE_OPERATOR_BUILDER(add);
+LBANN_DECLARE_OPERATOR_BUILDER(add_constant);
 LBANN_DECLARE_OPERATOR_BUILDER(asin);
 LBANN_DECLARE_OPERATOR_BUILDER(asinh);
 LBANN_DECLARE_OPERATOR_BUILDER(atan);
 LBANN_DECLARE_OPERATOR_BUILDER(atanh);
 LBANN_DECLARE_OPERATOR_BUILDER(ceil);
 LBANN_DECLARE_OPERATOR_BUILDER(clamp);
+LBANN_DECLARE_OPERATOR_BUILDER(constant_subtract);
 LBANN_DECLARE_OPERATOR_BUILDER(cos);
 LBANN_DECLARE_OPERATOR_BUILDER(cosh);
 LBANN_DECLARE_OPERATOR_BUILDER(divide);
 LBANN_DECLARE_OPERATOR_BUILDER(equal);
+LBANN_DECLARE_OPERATOR_BUILDER(equal_constant);
 LBANN_DECLARE_OPERATOR_BUILDER(erf);
 LBANN_DECLARE_OPERATOR_BUILDER(erfinv);
 LBANN_DECLARE_OPERATOR_BUILDER(exp);
 LBANN_DECLARE_OPERATOR_BUILDER(expm1);
 LBANN_DECLARE_OPERATOR_BUILDER(floor);
 LBANN_DECLARE_OPERATOR_BUILDER(greater);
+LBANN_DECLARE_OPERATOR_BUILDER(greater_constant);
 LBANN_DECLARE_OPERATOR_BUILDER(greater_equal);
+LBANN_DECLARE_OPERATOR_BUILDER(greater_equal_constant);
 LBANN_DECLARE_OPERATOR_BUILDER(less);
+LBANN_DECLARE_OPERATOR_BUILDER(less_constant);
 LBANN_DECLARE_OPERATOR_BUILDER(less_equal);
+LBANN_DECLARE_OPERATOR_BUILDER(less_equal_constant);
 LBANN_DECLARE_OPERATOR_BUILDER(log);
 LBANN_DECLARE_OPERATOR_BUILDER(log1p);
 LBANN_DECLARE_OPERATOR_BUILDER(logical_and);
@@ -64,17 +71,21 @@ LBANN_DECLARE_OPERATOR_BUILDER(logical_not);
 LBANN_DECLARE_OPERATOR_BUILDER(logical_or);
 LBANN_DECLARE_OPERATOR_BUILDER(logical_xor);
 LBANN_DECLARE_OPERATOR_BUILDER(max);
+LBANN_DECLARE_OPERATOR_BUILDER(max_constant);
 LBANN_DECLARE_OPERATOR_BUILDER(min);
+LBANN_DECLARE_OPERATOR_BUILDER(min_constant);
 LBANN_DECLARE_OPERATOR_BUILDER(mod);
 LBANN_DECLARE_OPERATOR_BUILDER(multiply);
 LBANN_DECLARE_OPERATOR_BUILDER(negative);
 LBANN_DECLARE_OPERATOR_BUILDER(not_equal);
+LBANN_DECLARE_OPERATOR_BUILDER(not_equal_constant);
 LBANN_DECLARE_OPERATOR_BUILDER(pow);
 LBANN_DECLARE_OPERATOR_BUILDER(reciprocal);
 LBANN_DECLARE_OPERATOR_BUILDER(round);
 LBANN_DECLARE_OPERATOR_BUILDER(rsqrt);
 LBANN_DECLARE_OPERATOR_BUILDER(safe_divide);
 LBANN_DECLARE_OPERATOR_BUILDER(safe_reciprocal);
+LBANN_DECLARE_OPERATOR_BUILDER(scale);
 LBANN_DECLARE_OPERATOR_BUILDER(sign);
 LBANN_DECLARE_OPERATOR_BUILDER(sin);
 LBANN_DECLARE_OPERATOR_BUILDER(sinh);
@@ -82,6 +93,7 @@ LBANN_DECLARE_OPERATOR_BUILDER(sqrt);
 LBANN_DECLARE_OPERATOR_BUILDER(square);
 LBANN_DECLARE_OPERATOR_BUILDER(squared_difference);
 LBANN_DECLARE_OPERATOR_BUILDER(subtract);
+LBANN_DECLARE_OPERATOR_BUILDER(subtract_constant);
 LBANN_DECLARE_OPERATOR_BUILDER(tan);
 LBANN_DECLARE_OPERATOR_BUILDER(tanh);
 
diff --git a/include/lbann/operators/math/math_builders_impl.hpp b/include/lbann/operators/math/math_builders_impl.hpp
index 0f69d3417f2..489693aa4b1 100644
--- a/include/lbann/operators/math/math_builders_impl.hpp
+++ b/include/lbann/operators/math/math_builders_impl.hpp
@@ -30,10 +30,12 @@
 
 #include "lbann/operators/math/abs.hpp"
 #include "lbann/operators/math/binary.hpp"
+#include "lbann/operators/math/binary_with_constant.hpp"
 #include "lbann/operators/math/clamp.hpp"
 #include "lbann/operators/math/unary.hpp"
 
 #include "lbann/proto/datatype_helpers.hpp"
+#include <operators.pb.h>
 
 template <typename DataT, El::Device D>
 std::unique_ptr<lbann::Operator<DataT, DataT, D>>
@@ -53,6 +55,33 @@ lbann::build_abs_operator(lbann_data::Operator const& op)
   return std::make_unique<AbsOperator<DataT, D>>();
 }
 
+#define LBANN_DEFINE_BIN_WITH_CONSTANT_BUILDER(OP_NAME, OP_LOWER_NAME)         \
+  template <typename DataT, El::Device D>                                      \
+  std::unique_ptr<lbann::Operator<DataT, DataT, D>>                            \
+    lbann::build_##OP_LOWER_NAME##_operator(lbann_data::Operator const& op)    \
+  {                                                                            \
+    details::AssertConsistentTypeParameters<DataT, DataT, D>(op);              \
+    lbann_data::OP_NAME##Operator params;                                      \
+    LBANN_ASSERT(op.parameters().UnpackTo(&params));                           \
+    return std::make_unique<OP_NAME##Operator<DataT, D>>(params.constant());   \
+  }
+
+LBANN_DEFINE_BIN_WITH_CONSTANT_BUILDER(AddConstant, add_constant)
+LBANN_DEFINE_BIN_WITH_CONSTANT_BUILDER(ConstantSubtract, constant_subtract)
+LBANN_DEFINE_BIN_WITH_CONSTANT_BUILDER(EqualConstant, equal_constant)
+LBANN_DEFINE_BIN_WITH_CONSTANT_BUILDER(GreaterConstant, greater_constant)
+LBANN_DEFINE_BIN_WITH_CONSTANT_BUILDER(GreaterEqualConstant,
+                                       greater_equal_constant)
+LBANN_DEFINE_BIN_WITH_CONSTANT_BUILDER(LessConstant, less_constant)
+LBANN_DEFINE_BIN_WITH_CONSTANT_BUILDER(LessEqualConstant, less_equal_constant)
+LBANN_DEFINE_BIN_WITH_CONSTANT_BUILDER(MaxConstant, max_constant)
+LBANN_DEFINE_BIN_WITH_CONSTANT_BUILDER(MinConstant, min_constant)
+LBANN_DEFINE_BIN_WITH_CONSTANT_BUILDER(NotEqualConstant, not_equal_constant)
+LBANN_DEFINE_BIN_WITH_CONSTANT_BUILDER(Scale, scale)
+LBANN_DEFINE_BIN_WITH_CONSTANT_BUILDER(SubtractConstant, subtract_constant)
+
+#undef LBANN_DEFINE_BIN_WITH_CONSTANT_BUILDER
+
 LBANN_DEFINE_OPERATOR_BUILDER(acos, Acos)
 LBANN_DEFINE_OPERATOR_BUILDER(acosh, Acosh)
 LBANN_DEFINE_OPERATOR_BUILDER(add, Add)
diff --git a/include/lbann/proto/operator_factory_impl.hpp b/include/lbann/proto/operator_factory_impl.hpp
index 88ab5cb13be..1f46605f545 100644
--- a/include/lbann/proto/operator_factory_impl.hpp
+++ b/include/lbann/proto/operator_factory_impl.hpp
@@ -54,6 +54,7 @@ OperatorFactory<InT, OutT, D> build_default_factory()
     LBANN_REGISTER_BUILDER(Acos, acos);
     LBANN_REGISTER_BUILDER(Acosh, acosh);
     LBANN_REGISTER_BUILDER(Add, add);
+    LBANN_REGISTER_BUILDER(AddConstant, add_constant);
     LBANN_REGISTER_BUILDER(Asin, asin);
     LBANN_REGISTER_BUILDER(Asinh, asinh);
     LBANN_REGISTER_BUILDER(Atan, atan);
@@ -64,19 +65,25 @@ OperatorFactory<InT, OutT, D> build_default_factory()
     LBANN_REGISTER_BUILDER(BooleanFalsePositive, boolean_false_positive);
     LBANN_REGISTER_BUILDER(Ceil, ceil);
     LBANN_REGISTER_BUILDER(Clamp, clamp);
+    LBANN_REGISTER_BUILDER(ConstantSubtract, constant_subtract);
     LBANN_REGISTER_BUILDER(Cos, cos);
     LBANN_REGISTER_BUILDER(Cosh, cosh);
     LBANN_REGISTER_BUILDER(Divide, divide);
     LBANN_REGISTER_BUILDER(Equal, equal);
+    LBANN_REGISTER_BUILDER(EqualConstant, equal_constant);
     LBANN_REGISTER_BUILDER(Erf, erf);
     LBANN_REGISTER_BUILDER(ErfInv, erfinv);
     LBANN_REGISTER_BUILDER(Exp, exp);
     LBANN_REGISTER_BUILDER(Expm1, expm1);
     LBANN_REGISTER_BUILDER(Floor, floor);
     LBANN_REGISTER_BUILDER(Greater, greater);
+    LBANN_REGISTER_BUILDER(GreaterConstant, greater_constant);
     LBANN_REGISTER_BUILDER(GreaterEqual, greater_equal);
+    LBANN_REGISTER_BUILDER(GreaterEqualConstant, greater_equal_constant);
     LBANN_REGISTER_BUILDER(Less, less);
+    LBANN_REGISTER_BUILDER(LessConstant, less_constant);
     LBANN_REGISTER_BUILDER(LessEqual, less_equal);
+    LBANN_REGISTER_BUILDER(LessEqualConstant, less_equal_constant);
     LBANN_REGISTER_BUILDER(Log, log);
     LBANN_REGISTER_BUILDER(Log1p, log1p);
     LBANN_REGISTER_BUILDER(LogSigmoid, log_sigmoid);
@@ -85,17 +92,21 @@ OperatorFactory<InT, OutT, D> build_default_factory()
     LBANN_REGISTER_BUILDER(LogicalOr, logical_or);
     LBANN_REGISTER_BUILDER(LogicalXor, logical_xor);
     LBANN_REGISTER_BUILDER(Max, max);
+    LBANN_REGISTER_BUILDER(MaxConstant, max_constant);
     LBANN_REGISTER_BUILDER(Min, min);
+    LBANN_REGISTER_BUILDER(MinConstant, min_constant);
     LBANN_REGISTER_BUILDER(Mod, mod);
     LBANN_REGISTER_BUILDER(Multiply, multiply);
     LBANN_REGISTER_BUILDER(Negative, negative);
     LBANN_REGISTER_BUILDER(NotEqual, not_equal);
+    LBANN_REGISTER_BUILDER(NotEqualConstant, not_equal_constant);
     LBANN_REGISTER_BUILDER(Pow, pow);
     LBANN_REGISTER_BUILDER(Reciprocal, reciprocal);
     LBANN_REGISTER_BUILDER(Round, round);
     LBANN_REGISTER_BUILDER(Rsqrt, rsqrt);
     LBANN_REGISTER_BUILDER(SafeDivide, safe_divide);
     LBANN_REGISTER_BUILDER(SafeReciprocal, safe_reciprocal);
+    LBANN_REGISTER_BUILDER(Scale, scale);
     LBANN_REGISTER_BUILDER(Selu, selu);
     LBANN_REGISTER_BUILDER(Sigmoid, sigmoid);
     LBANN_REGISTER_BUILDER(SigmoidBinaryCrossEntropy,
@@ -109,6 +120,7 @@ OperatorFactory<InT, OutT, D> build_default_factory()
     LBANN_REGISTER_BUILDER(Square, square);
     LBANN_REGISTER_BUILDER(SquaredDifference, squared_difference);
     LBANN_REGISTER_BUILDER(Subtract, subtract);
+    LBANN_REGISTER_BUILDER(SubtractConstant, subtract_constant);
     LBANN_REGISTER_BUILDER(Tan, tan);
     LBANN_REGISTER_BUILDER(Tanh, tanh);
   }
diff --git a/src/operators/math/CMakeLists.txt b/src/operators/math/CMakeLists.txt
index 99708065416..a557eb4d77a 100644
--- a/src/operators/math/CMakeLists.txt
+++ b/src/operators/math/CMakeLists.txt
@@ -4,6 +4,7 @@ set_full_path(THIS_DIR_SOURCES
 
   abs.cpp
   binary.cpp
+  binary_with_constant.cpp
   clamp.cpp
   math_builders.cpp
   unary.cpp
@@ -16,6 +17,7 @@ if (LBANN_HAS_GPU)
 
     abs.cu
     binary.cu
+    binary_with_constant.cu
     clamp.cu
     unary.cu
     )
diff --git a/src/operators/math/binary_with_constant.cpp b/src/operators/math/binary_with_constant.cpp
new file mode 100644
index 00000000000..fa4491b81c9
--- /dev/null
+++ b/src/operators/math/binary_with_constant.cpp
@@ -0,0 +1,378 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/binary_with_constant.hpp"
+
+#include "common.hpp"
+#include "lbann_config.hpp"
+#include <hydrogen/meta/TypeTraits.hpp>
+
+namespace lbann {
+
+template <typename DataT, El::Device D>
+void AddConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  El::EntrywiseMap(inputs.front().data(),
+                   outputs.front().data(),
+                   std::function<DataT(DataT const&)>(
+                     [this](DataT const& x) { return x + this->m_constant; }));
+}
+
+template <typename DataT, El::Device D>
+void AddConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> gradient_wrt_outputs,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(gradient_wrt_outputs.size() == 1);
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  El::Copy(gradient_wrt_outputs.front().data(),
+           gradient_wrt_inputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void ScaleOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  El::EntrywiseMap(inputs.front().data(),
+                   outputs.front().data(),
+                   std::function<DataT(DataT const&)>(
+                     [this](DataT const& x) { return x * this->m_constant; }));
+}
+
+template <typename DataT, El::Device D>
+void ScaleOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> gradient_wrt_outputs,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(gradient_wrt_outputs.size() == 1);
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  El::EntrywiseMap(gradient_wrt_outputs.front().data(),
+                   gradient_wrt_inputs.front().data(),
+                   std::function<DataT(DataT const&)>(
+                     [this](DataT const& x) { return x * this->m_constant; }));
+}
+
+template <typename DataT, El::Device D>
+void SubtractConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  El::EntrywiseMap(inputs.front().data(),
+                   outputs.front().data(),
+                   std::function<DataT(DataT const&)>(
+                     [this](DataT const& x) { return x - this->m_constant; }));
+}
+
+template <typename DataT, El::Device D>
+void SubtractConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> gradient_wrt_outputs,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(gradient_wrt_outputs.size() == 1);
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  El::Copy(gradient_wrt_outputs.front().data(),
+           gradient_wrt_inputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void ConstantSubtractOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  El::EntrywiseMap(inputs.front().data(),
+                   outputs.front().data(),
+                   std::function<DataT(DataT const&)>(
+                     [this](DataT const& x) { return this->m_constant - x; }));
+}
+
+template <typename DataT, El::Device D>
+void ConstantSubtractOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> gradient_wrt_outputs,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(gradient_wrt_outputs.size() == 1);
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  El::EntrywiseMap(
+    gradient_wrt_outputs.front().data(),
+    gradient_wrt_inputs.front().data(),
+    std::function<DataT(DataT const&)>([](DataT const& x) { return -x; }));
+}
+
+template <typename DataT, El::Device D>
+void MaxConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  El::EntrywiseMap(inputs.front().data(),
+                   outputs.front().data(),
+                   std::function<DataT(DataT const&)>([this](DataT const& x) {
+                     return std::max(this->m_constant, x);
+                   }));
+}
+
+template <typename DataT, El::Device D>
+void MaxConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<ConstLocalOutputTensorType> gradient_wrt_outputs,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(gradient_wrt_outputs.size() == 1);
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  internal::EntrywiseZipInto(inputs.front().data(),
+                             gradient_wrt_outputs.front().data(),
+                             gradient_wrt_inputs.front().data(),
+                             [this](DataT const& x, DataT const& dy) {
+                               auto const& c = this->m_constant;
+                               return (x < c ? El::TypeTraits<DataT>::Zero()
+                                             : (x > c ? dy : dy / 2));
+                             });
+}
+
+template <typename DataT, El::Device D>
+void MinConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  El::EntrywiseMap(inputs.front().data(),
+                   outputs.front().data(),
+                   std::function<DataT(DataT const&)>([this](DataT const& x) {
+                     return std::min(this->m_constant, x);
+                   }));
+}
+
+template <typename DataT, El::Device D>
+void MinConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<ConstLocalOutputTensorType> gradient_wrt_outputs,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(gradient_wrt_outputs.size() == 1);
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  internal::EntrywiseZipInto(
+    inputs.front().data(),
+    gradient_wrt_outputs.front().data(),
+    gradient_wrt_inputs.front().data(),
+    [this](DataT const& x, DataT const& dy) {
+      auto const& c = this->m_constant;
+      return (x < c ? dy : (x > c ? El::TypeTraits<DataT>::Zero() : dy / 2));
+    });
+}
+
+template <typename DataT, El::Device D>
+void EqualConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  El::EntrywiseMap(inputs.front().data(),
+                   outputs.front().data(),
+                   std::function<DataT(DataT const&)>([this](DataT const& x) {
+                     return (this->m_constant == x
+                               ? El::TypeTraits<DataT>::One()
+                               : El::TypeTraits<DataT>::Zero());
+                   }));
+}
+
+template <typename DataT, El::Device D>
+void EqualConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> /*gradient_wrt_outputs*/,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  El::Zero(gradient_wrt_inputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void NotEqualConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  El::EntrywiseMap(inputs.front().data(),
+                   outputs.front().data(),
+                   std::function<DataT(DataT const&)>([this](DataT const& x) {
+                     return (this->m_constant == x
+                               ? El::TypeTraits<DataT>::Zero()
+                               : El::TypeTraits<DataT>::One());
+                   }));
+}
+
+template <typename DataT, El::Device D>
+void NotEqualConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> /*gradient_wrt_outputs*/,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  El::Zero(gradient_wrt_inputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void LessConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  El::EntrywiseMap(inputs.front().data(),
+                   outputs.front().data(),
+                   std::function<DataT(DataT const&)>([this](DataT const& x) {
+                     return (x < this->m_constant
+                               ? El::TypeTraits<DataT>::One()
+                               : El::TypeTraits<DataT>::Zero());
+                   }));
+}
+
+template <typename DataT, El::Device D>
+void LessConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> /*gradient_wrt_outputs*/,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  El::Zero(gradient_wrt_inputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void LessEqualConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  El::EntrywiseMap(inputs.front().data(),
+                   outputs.front().data(),
+                   std::function<DataT(DataT const&)>([this](DataT const& x) {
+                     return (x <= this->m_constant
+                               ? El::TypeTraits<DataT>::One()
+                               : El::TypeTraits<DataT>::Zero());
+                   }));
+}
+
+template <typename DataT, El::Device D>
+void LessEqualConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> /*gradient_wrt_outputs*/,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  El::Zero(gradient_wrt_inputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void GreaterConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  El::EntrywiseMap(inputs.front().data(),
+                   outputs.front().data(),
+                   std::function<DataT(DataT const&)>([this](DataT const& x) {
+                     return (this->m_constant < x
+                               ? El::TypeTraits<DataT>::One()
+                               : El::TypeTraits<DataT>::Zero());
+                   }));
+}
+
+template <typename DataT, El::Device D>
+void GreaterConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> /*gradient_wrt_outputs*/,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  El::Zero(gradient_wrt_inputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void GreaterEqualConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  El::EntrywiseMap(inputs.front().data(),
+                   outputs.front().data(),
+                   std::function<DataT(DataT const&)>([this](DataT const& x) {
+                     return (this->m_constant <= x
+                               ? El::TypeTraits<DataT>::One()
+                               : El::TypeTraits<DataT>::Zero());
+                   }));
+}
+
+template <typename DataT, El::Device D>
+void GreaterEqualConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> /*gradient_wrt_outputs*/,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  El::Zero(gradient_wrt_inputs.front().data());
+}
+
+#define PROTO(T)                                                               \
+  template class AddConstantOperator<T, El::Device::CPU>;                      \
+  template class ScaleOperator<T, El::Device::CPU>;                            \
+  template class SubtractConstantOperator<T, El::Device::CPU>;                 \
+  template class ConstantSubtractOperator<T, El::Device::CPU>;                 \
+  template class MaxConstantOperator<T, El::Device::CPU>;                      \
+  template class MinConstantOperator<T, El::Device::CPU>;                      \
+  template class EqualConstantOperator<T, El::Device::CPU>;                    \
+  template class NotEqualConstantOperator<T, El::Device::CPU>;                 \
+  template class LessConstantOperator<T, El::Device::CPU>;                     \
+  template class LessEqualConstantOperator<T, El::Device::CPU>;                \
+  template class GreaterConstantOperator<T, El::Device::CPU>;                  \
+  template class GreaterEqualConstantOperator<T, El::Device::CPU>
+
+#define LBANN_INSTANTIATE_CPU_HALF
+#include "lbann/macros/instantiate.hpp"
+
+} // namespace lbann
diff --git a/src/operators/math/binary_with_constant.cu b/src/operators/math/binary_with_constant.cu
new file mode 100644
index 00000000000..34addb78c08
--- /dev/null
+++ b/src/operators/math/binary_with_constant.cu
@@ -0,0 +1,507 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/binary_with_constant.hpp"
+#include "lbann/utils/gpu/helpers.hpp"
+
+#include "common.cuh"
+
+namespace {
+// Device lambdas cannot be used in functions with private or
+// protected access within their class. So we move them to functions
+// that aren't in their class at all. "The only problem you can't
+// solve with more indirection is too much indirection."
+template <typename T>
+void ApplyAddFP(T c,
+                El::Matrix<T, El::Device::GPU> const& x,
+                El::Matrix<T, El::Device::GPU>& y)
+{
+  El::EntrywiseMap(x, y,
+                   [c] __device__ (T const& x) {
+                     return x + c;
+                   });
+}
+
+template <typename T>
+void ApplyScaleFP(T c,
+                  El::Matrix<T, El::Device::GPU> const& x,
+                  El::Matrix<T, El::Device::GPU>& y)
+{
+  El::EntrywiseMap(x, y,
+                   [c] __device__ (T const& x) {
+                     return x * c;
+                   });
+}
+
+template <typename T>
+void ApplySubtractFP(T c,
+                  El::Matrix<T, El::Device::GPU> const& x,
+                  El::Matrix<T, El::Device::GPU>& y)
+{
+  El::EntrywiseMap(x, y,
+                   [c] __device__ (T const& x) {
+                     return x - c;
+                   });
+}
+
+template <typename T>
+void ApplyCSubtractFP(T c,
+                      El::Matrix<T, El::Device::GPU> const& x,
+                      El::Matrix<T, El::Device::GPU>& y)
+{
+  El::EntrywiseMap(x, y,
+                   [c] __device__ (T const& x) {
+                     return c - x;
+                   });
+}
+
+template <typename T>
+void ApplyCSubtractBP(El::Matrix<T, El::Device::GPU> const& x,
+                      El::Matrix<T, El::Device::GPU>& y)
+{
+  El::EntrywiseMap(x, y,
+                   [] __device__ (T const& x) {
+                     return -x;
+                   });
+}
+
+template <typename T>
+void ApplyMaxFP(T c,
+                El::Matrix<T, El::Device::GPU> const& x,
+                El::Matrix<T, El::Device::GPU>& y)
+{
+  El::EntrywiseMap(x, y,
+                   [c] __device__ (T const& x) {
+                     return ::lbann::gpu_lib::max(c, x);
+                   });
+}
+
+template <typename T>
+void ApplyMaxBP(T c,
+                El::Matrix<T, El::Device::GPU> const& in,
+                El::Matrix<T, El::Device::GPU> const& grad_wrt_out,
+                El::Matrix<T, El::Device::GPU>& grad_wrt_in)
+{
+  ::lbann::internal::EntrywiseZipInto(
+    in, grad_wrt_out, grad_wrt_in,
+    [c] __device__ (T const& x, T const& dy) {
+      return (x < c ? (T) 0. : (x > c ? dy : dy / (T) 2.));
+    });
+
+}
+
+template <typename T>
+void ApplyMinFP(T c,
+                El::Matrix<T, El::Device::GPU> const& x,
+                El::Matrix<T, El::Device::GPU>& y)
+{
+  El::EntrywiseMap(x, y,
+                   [c] __device__ (T const& x) {
+                     return ::lbann::gpu_lib::min(c, x);
+                   });
+}
+
+template <typename T>
+void ApplyMinBP(T c,
+                El::Matrix<T, El::Device::GPU> const& in,
+                El::Matrix<T, El::Device::GPU> const& grad_wrt_out,
+                El::Matrix<T, El::Device::GPU>& grad_wrt_in)
+{
+  ::lbann::internal::EntrywiseZipInto(
+    in, grad_wrt_out, grad_wrt_in,
+    [c] __device__ (T const& x, T const& dy) {
+      return (x < c ? dy : (x > c ? (T) 0. : dy / (T) 2.));
+    });
+}
+
+template <typename T>
+void ApplyEqualFP(T c,
+                  El::Matrix<T, El::Device::GPU> const& x,
+                  El::Matrix<T, El::Device::GPU>& y)
+{
+  El::EntrywiseMap(x, y,
+                   [c] __device__ (T const& x) {
+                     return (c == x ? (T) 1. : (T) 0.);
+                   });
+}
+
+template <typename T>
+void ApplyNotEqualFP(T c,
+                     El::Matrix<T, El::Device::GPU> const& x,
+                     El::Matrix<T, El::Device::GPU>& y)
+{
+  El::EntrywiseMap(x, y,
+                   [c] __device__ (T const& x) {
+                     return (c == x ? (T) 0. : (T) 1.);
+                   });
+}
+
+template <typename T>
+void ApplyLessFP(T c,
+                 El::Matrix<T, El::Device::GPU> const& x,
+                 El::Matrix<T, El::Device::GPU>& y)
+{
+  El::EntrywiseMap(x, y,
+                   [c] __device__ (T const& x) {
+                     return (x < c ? (T) 1. : (T) 0.);
+                   });
+}
+
+template <typename T>
+void ApplyGreaterFP(T c,
+                    El::Matrix<T, El::Device::GPU> const& x,
+                    El::Matrix<T, El::Device::GPU>& y)
+{
+  El::EntrywiseMap(x, y,
+                   [c] __device__ (T const& x) {
+                     return (c < x ? (T) 1. : (T) 0.);
+                   });
+}
+
+template <typename T>
+void ApplyLessEqualFP(T c,
+                      El::Matrix<T, El::Device::GPU> const& x,
+                      El::Matrix<T, El::Device::GPU>& y)
+{
+  El::EntrywiseMap(x, y,
+                   [c] __device__ (T const& x) {
+                     return (x <= c ? (T) 1. : (T) 0.);
+                   });
+}
+
+template <typename T>
+void ApplyGreaterEqualFP(T c,
+                         El::Matrix<T, El::Device::GPU> const& x,
+                         El::Matrix<T, El::Device::GPU>& y)
+{
+  El::EntrywiseMap(x, y,
+                   [c] __device__ (T const& x) {
+                     return (c <= x ? (T) 1. : (T) 0.);
+                   });
+}
+
+} // namespace
+
+namespace lbann {
+
+template <typename DataT, El::Device D>
+void AddConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  ApplyAddFP(this->m_constant,
+             inputs.front().data(),
+             outputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void AddConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> gradient_wrt_outputs,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(gradient_wrt_outputs.size() == 1);
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  El::Copy(gradient_wrt_outputs.front().data(),
+           gradient_wrt_inputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void ScaleOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  ApplyScaleFP(this->m_constant,
+               inputs.front().data(),
+               outputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void ScaleOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> gradient_wrt_outputs,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  this->fp_compute_local(std::move(gradient_wrt_outputs),
+                         std::move(gradient_wrt_inputs));
+}
+
+template <typename DataT, El::Device D>
+void SubtractConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  ApplySubtractFP(this->m_constant,
+                  inputs.front().data(),
+                  outputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void SubtractConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> gradient_wrt_outputs,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(gradient_wrt_outputs.size() == 1);
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  El::Copy(gradient_wrt_outputs.front().data(),
+           gradient_wrt_inputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void ConstantSubtractOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  ApplyCSubtractFP(this->m_constant,
+                   inputs.front().data(),
+                   outputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void ConstantSubtractOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> gradient_wrt_outputs,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(gradient_wrt_outputs.size() == 1);
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  ApplyCSubtractBP(gradient_wrt_outputs.front().data(),
+                   gradient_wrt_inputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void MaxConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  ApplyMaxFP(this->m_constant,
+             inputs.front().data(),
+             outputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void MaxConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<ConstLocalOutputTensorType> gradient_wrt_outputs,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(gradient_wrt_outputs.size() == 1);
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  ApplyMaxBP(this->m_constant,
+             inputs.front().data(),
+             gradient_wrt_outputs.front().data(),
+             gradient_wrt_inputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void MinConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  ApplyMinFP(this->m_constant,
+             inputs.front().data(),
+             outputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void MinConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<ConstLocalOutputTensorType> gradient_wrt_outputs,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(gradient_wrt_outputs.size() == 1);
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  ApplyMinBP(this->m_constant,
+             inputs.front().data(),
+             gradient_wrt_outputs.front().data(),
+             gradient_wrt_inputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void EqualConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  ApplyEqualFP(this->m_constant,
+               inputs.front().data(),
+               outputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void EqualConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> /*gradient_wrt_outputs*/,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  El::Zero(gradient_wrt_inputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void NotEqualConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  ApplyNotEqualFP(this->m_constant,
+               inputs.front().data(),
+               outputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void NotEqualConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> /*gradient_wrt_outputs*/,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  El::Zero(gradient_wrt_inputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void LessConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  ApplyLessFP(this->m_constant,
+              inputs.front().data(),
+              outputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void LessConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> /*gradient_wrt_outputs*/,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  El::Zero(gradient_wrt_inputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void LessEqualConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  ApplyLessEqualFP(this->m_constant,
+                   inputs.front().data(),
+                   outputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void LessEqualConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> /*gradient_wrt_outputs*/,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  El::Zero(gradient_wrt_inputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void GreaterConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  ApplyGreaterFP(this->m_constant,
+                 inputs.front().data(),
+                 outputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void GreaterConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> /*gradient_wrt_outputs*/,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  El::Zero(gradient_wrt_inputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void GreaterEqualConstantOperator<DataT, D>::fp_compute_local(
+  std::vector<ConstLocalInputTensorType> inputs,
+  std::vector<LocalOutputTensorType> outputs) const
+{
+  LBANN_ASSERT_DEBUG(inputs.size() == 1);
+  LBANN_ASSERT_DEBUG(outputs.size() == 1);
+  ApplyGreaterEqualFP(this->m_constant,
+                      inputs.front().data(),
+                      outputs.front().data());
+}
+
+template <typename DataT, El::Device D>
+void GreaterEqualConstantOperator<DataT, D>::bp_compute_local(
+  std::vector<ConstLocalInputTensorType> /*inputs*/,
+  std::vector<ConstLocalOutputTensorType> /*gradient_wrt_outputs*/,
+  std::vector<LocalInputTensorType> gradient_wrt_inputs) const
+{
+  LBANN_ASSERT_DEBUG(gradient_wrt_inputs.size() == 1);
+  El::Zero(gradient_wrt_inputs.front().data());
+}
+
+#define PROTO(T)                                                               \
+  template class AddConstantOperator<T, El::Device::GPU>;                      \
+  template class ScaleOperator<T, El::Device::GPU>;                            \
+  template class SubtractConstantOperator<T, El::Device::GPU>;                 \
+  template class ConstantSubtractOperator<T, El::Device::GPU>;                 \
+  template class MaxConstantOperator<T, El::Device::GPU>;                      \
+  template class MinConstantOperator<T, El::Device::GPU>;                      \
+  template class EqualConstantOperator<T, El::Device::GPU>;                    \
+  template class NotEqualConstantOperator<T, El::Device::GPU>;                 \
+  template class LessConstantOperator<T, El::Device::GPU>;                     \
+  template class LessEqualConstantOperator<T, El::Device::GPU>;                \
+  template class GreaterConstantOperator<T, El::Device::GPU>;                  \
+  template class GreaterEqualConstantOperator<T, El::Device::GPU>
+
+#define LBANN_INSTANTIATE_GPU_HALF
+#include "lbann/macros/instantiate.hpp"
+
+} // namespace lbann
diff --git a/src/operators/math/cereal_registration/CMakeLists.txt b/src/operators/math/cereal_registration/CMakeLists.txt
index dccb614e18f..0947c3c29e3 100644
--- a/src/operators/math/cereal_registration/CMakeLists.txt
+++ b/src/operators/math/cereal_registration/CMakeLists.txt
@@ -4,16 +4,19 @@ set_full_path(THIS_DIR_SOURCES
   acos.cpp
   acosh.cpp
   add.cpp
+  add_constant.cpp
   asin.cpp
   asinh.cpp
   atan.cpp
   atanh.cpp
   ceil.cpp
   clamp.cpp
+  constant_subtract.cpp
   cos.cpp
   cosh.cpp
   divide.cpp
   equal.cpp
+  equal_constant.cpp
   erf.cpp
   erfinv.cpp
   exp.cpp
@@ -23,6 +26,10 @@ set_full_path(THIS_DIR_SOURCES
   greater_equal.cpp
   less.cpp
   less_equal.cpp
+  greater_constant.cpp
+  greater_equal_constant.cpp
+  less_constant.cpp
+  less_equal_constant.cpp
   log.cpp
   log1p.cpp
   logical_and.cpp
@@ -31,16 +38,20 @@ set_full_path(THIS_DIR_SOURCES
   logical_xor.cpp
   max.cpp
   min.cpp
+  max_constant.cpp
+  min_constant.cpp
   mod.cpp
   multiply.cpp
   negative.cpp
   not_equal.cpp
+  not_equal_constant.cpp
   pow.cpp
   reciprocal.cpp
   round.cpp
   rsqrt.cpp
   safe_divide.cpp
   safe_reciprocal.cpp
+  scale.cpp
   sign.cpp
   sin.cpp
   sinh.cpp
@@ -48,6 +59,7 @@ set_full_path(THIS_DIR_SOURCES
   square.cpp
   squared_difference.cpp
   subtract.cpp
+  subtract_constant.cpp
   tan.cpp
   tanh.cpp
   )
diff --git a/src/operators/math/cereal_registration/add_constant.cpp b/src/operators/math/cereal_registration/add_constant.cpp
new file mode 100644
index 00000000000..866628954a5
--- /dev/null
+++ b/src/operators/math/cereal_registration/add_constant.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/binary_with_constant.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#define LBANN_OPERATOR_NAME AddConstantOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/constant_subtract.cpp b/src/operators/math/cereal_registration/constant_subtract.cpp
new file mode 100644
index 00000000000..7e136856548
--- /dev/null
+++ b/src/operators/math/cereal_registration/constant_subtract.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/binary_with_constant.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#define LBANN_OPERATOR_NAME ConstantSubtractOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/equal_constant.cpp b/src/operators/math/cereal_registration/equal_constant.cpp
new file mode 100644
index 00000000000..025a1cb554d
--- /dev/null
+++ b/src/operators/math/cereal_registration/equal_constant.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/binary_with_constant.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#define LBANN_OPERATOR_NAME EqualConstantOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/greater_constant.cpp b/src/operators/math/cereal_registration/greater_constant.cpp
new file mode 100644
index 00000000000..82e5bf8b7da
--- /dev/null
+++ b/src/operators/math/cereal_registration/greater_constant.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/binary_with_constant.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#define LBANN_OPERATOR_NAME GreaterConstantOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/greater_equal_constant.cpp b/src/operators/math/cereal_registration/greater_equal_constant.cpp
new file mode 100644
index 00000000000..377f9c9768e
--- /dev/null
+++ b/src/operators/math/cereal_registration/greater_equal_constant.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/binary_with_constant.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#define LBANN_OPERATOR_NAME GreaterEqualConstantOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/less_constant.cpp b/src/operators/math/cereal_registration/less_constant.cpp
new file mode 100644
index 00000000000..ca98118c720
--- /dev/null
+++ b/src/operators/math/cereal_registration/less_constant.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/binary_with_constant.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#define LBANN_OPERATOR_NAME LessConstantOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/less_equal_constant.cpp b/src/operators/math/cereal_registration/less_equal_constant.cpp
new file mode 100644
index 00000000000..9bf3cb0ae31
--- /dev/null
+++ b/src/operators/math/cereal_registration/less_equal_constant.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/binary_with_constant.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#define LBANN_OPERATOR_NAME LessEqualConstantOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/max_constant.cpp b/src/operators/math/cereal_registration/max_constant.cpp
new file mode 100644
index 00000000000..c0b3d47c5dc
--- /dev/null
+++ b/src/operators/math/cereal_registration/max_constant.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/binary_with_constant.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#define LBANN_OPERATOR_NAME MaxConstantOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/min_constant.cpp b/src/operators/math/cereal_registration/min_constant.cpp
new file mode 100644
index 00000000000..2980ccc0adb
--- /dev/null
+++ b/src/operators/math/cereal_registration/min_constant.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/binary_with_constant.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#define LBANN_OPERATOR_NAME MinConstantOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/not_equal_constant.cpp b/src/operators/math/cereal_registration/not_equal_constant.cpp
new file mode 100644
index 00000000000..d7cc4e10dfa
--- /dev/null
+++ b/src/operators/math/cereal_registration/not_equal_constant.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/binary_with_constant.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#define LBANN_OPERATOR_NAME NotEqualConstantOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/scale.cpp b/src/operators/math/cereal_registration/scale.cpp
new file mode 100644
index 00000000000..18f22a2fb53
--- /dev/null
+++ b/src/operators/math/cereal_registration/scale.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/binary_with_constant.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#define LBANN_OPERATOR_NAME ScaleOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/cereal_registration/subtract_constant.cpp b/src/operators/math/cereal_registration/subtract_constant.cpp
new file mode 100644
index 00000000000..7b3b4fb75c3
--- /dev/null
+++ b/src/operators/math/cereal_registration/subtract_constant.cpp
@@ -0,0 +1,31 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/operators/math/binary_with_constant.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#define LBANN_OPERATOR_NAME SubtractConstantOperator
+#include <lbann/macros/register_operator_with_cereal.hpp>
diff --git a/src/operators/math/math_builders.cpp b/src/operators/math/math_builders.cpp
index 279e47756c4..e9a74db0b4e 100644
--- a/src/operators/math/math_builders.cpp
+++ b/src/operators/math/math_builders.cpp
@@ -44,25 +44,32 @@ LBANN_ABS_OP_COMPLEX_ETI(El::Device::GPU);
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(acos, T, D);                          \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(acosh, T, D);                         \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(add, T, D);                           \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(add_constant, T, D);                  \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(asin, T, D);                          \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(asinh, T, D);                         \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(atan, T, D);                          \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(atanh, T, D);                         \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(ceil, T, D);                          \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(clamp, T, D);                         \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(constant_subtract, T, D);             \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(cos, T, D);                           \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(cosh, T, D);                          \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(divide, T, D);                        \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(equal, T, D);                         \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(equal_constant, T, D);                \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(erf, T, D);                           \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(erfinv, T, D);                        \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(exp, T, D);                           \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(expm1, T, D);                         \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(floor, T, D);                         \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(greater, T, D);                       \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(greater_constant, T, D);              \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(greater_equal, T, D);                 \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(greater_equal_constant, T, D);        \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(less, T, D);                          \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(less_constant, T, D);                 \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(less_equal, T, D);                    \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(less_equal_constant, T, D);           \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(log, T, D);                           \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(log1p, T, D);                         \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(logical_and, T, D);                   \
@@ -70,17 +77,21 @@ LBANN_ABS_OP_COMPLEX_ETI(El::Device::GPU);
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(logical_or, T, D);                    \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(logical_xor, T, D);                   \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(max, T, D);                           \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(max_constant, T, D);                  \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(min, T, D);                           \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(min_constant, T, D);                  \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(mod, T, D);                           \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(multiply, T, D);                      \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(negative, T, D);                      \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(not_equal, T, D);                     \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(not_equal_constant, T, D);            \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(pow, T, D);                           \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(reciprocal, T, D);                    \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(round, T, D);                         \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(rsqrt, T, D);                         \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(safe_divide, T, D);                   \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(safe_reciprocal, T, D);               \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(scale, T, D);                         \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(sign, T, D);                          \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(sin, T, D);                           \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(sinh, T, D);                          \
@@ -88,6 +99,7 @@ LBANN_ABS_OP_COMPLEX_ETI(El::Device::GPU);
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(square, T, D);                        \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(squared_difference, T, D);            \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(subtract, T, D);                      \
+  LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(subtract_constant, T, D);             \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(tan, T, D);                           \
   LBANN_SINGLE_TYPE_OPERATOR_BUILDER_ETI(tanh, T, D)
 
diff --git a/src/operators/math/unit_test/CMakeLists.txt b/src/operators/math/unit_test/CMakeLists.txt
index 2bc416c3865..a312ae03011 100644
--- a/src/operators/math/unit_test/CMakeLists.txt
+++ b/src/operators/math/unit_test/CMakeLists.txt
@@ -1,11 +1,17 @@
 set_full_path(THIS_DIR_MPI_CATCH2_TEST_FILES
   abs_test.cpp
   add_test.cpp
+  add_constant_test.cpp
   clamp_test.cpp
+  constant_subtract_test.cpp
   cos_test.cpp
+  equal_constant_test.cpp
   multiply_test.cpp
+  not_equal_constant_test.cpp
+  scale_test.cpp
   sin_test.cpp
   subtract_test.cpp
+  subtract_constant_test.cpp
   )
 
 set(LBANN_MPI_CATCH2_TEST_FILES
diff --git a/src/operators/math/unit_test/add_constant_test.cpp b/src/operators/math/unit_test/add_constant_test.cpp
new file mode 100644
index 00000000000..ab7c9a912fb
--- /dev/null
+++ b/src/operators/math/unit_test/add_constant_test.cpp
@@ -0,0 +1,318 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+// Testing framework stuff
+#include <catch2/catch.hpp>
+
+#include "MPITestHelpers.hpp"
+#include "MatrixHelpers.hpp"
+#include "TestHelpers.hpp"
+
+#include "OperatorTraits.hpp"
+
+// CUT
+#include "lbann/operators/math/binary_with_constant.hpp"
+
+// Other stuff
+#include "lbann/proto/factories.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#include <h2/meta/Core.hpp>
+#include <h2/meta/TypeList.hpp>
+
+#include <functional>
+#include <memory>
+#include <numeric>
+#include <operators.pb.h>
+
+using namespace lbann;
+
+// Define the list of operators to test. Basically this is
+// {float,double}x{CPU,GPU}.
+template <typename T>
+using AddConstantOperatorAllDevices = h2::meta::TL<
+#ifdef LBANN_HAS_GPU
+  AddConstantOperator<T, El::Device::GPU>,
+#endif // LBANN_HAS_GPU
+  AddConstantOperator<T, El::Device::CPU>>;
+
+using AllAddConstantOpTypes =
+  h2::meta::tlist::Append<AddConstantOperatorAllDevices<float>,
+                          AddConstantOperatorAllDevices<double>>;
+
+namespace lbann {
+template <typename T, El::Device D>
+struct OperatorTraits<AddConstantOperator<T, D>>
+  : OperatorTraits<Operator<T, T, D>>
+{
+};
+} // namespace lbann
+
+// Save some typing.
+using unit_test::utilities::IsValidPtr;
+
+TEMPLATE_LIST_TEST_CASE("AddConstant operator lifecycle",
+                        "[mpi][operator][math][addconstant][lifecycle]",
+                        AllAddConstantOpTypes)
+{
+  using ThisOpType = TestType;
+  using DataT = InputValueType<ThisOpType>;
+
+  SECTION("Construction with valid arguments")
+  {
+    std::unique_ptr<ThisOpType> op_ptr = nullptr;
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+    REQUIRE(op_ptr->get_constant() == El::To<DataT>(0));
+
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+  }
+  SECTION("Copy interface")
+  {
+    std::unique_ptr<ThisOpType> clone_ptr = nullptr;
+    REQUIRE_NOTHROW(clone_ptr = ThisOpType{}.clone());
+    REQUIRE(clone_ptr->get_constant() == El::To<DataT>(0));
+
+    ThisOpType op;
+    REQUIRE_NOTHROW(op = *clone_ptr);
+    REQUIRE(op.get_constant() == El::To<DataT>(0));
+  }
+  SECTION("Construct from protobuf")
+  {
+    constexpr auto D = Device<ThisOpType>;
+    using InT = InputValueType<ThisOpType>;
+    using OutT = OutputValueType<ThisOpType>;
+
+    lbann_data::Operator proto_op;
+    ThisOpType{El::To<DataT>(13.f)}.write_proto(proto_op);
+
+    std::unique_ptr<BaseOperatorType<ThisOpType>> base_ptr = nullptr;
+    REQUIRE_NOTHROW(base_ptr =
+                      proto::construct_operator<InT, OutT, D>(proto_op));
+    CHECK(base_ptr->get_type() == "add constant");
+
+    auto* specific_ptr = dynamic_cast<ThisOpType*>(base_ptr.get());
+    CHECK(IsValidPtr(specific_ptr));
+    CHECK(specific_ptr->get_constant() == El::To<DataT>(13));
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("AddConstant operator action",
+                        "[mpi][operator][math][addconstant][action]",
+                        AllAddConstantOpTypes)
+{
+  using ThisOpType = TestType;
+  using InOutDataType = InputValueType<ThisOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+  auto const& g = world_comm.get_trainer_grid();
+
+  // Some common data
+  ThisOpType op(El::To<InOutDataType>(13));
+
+  El::Int const height = 13;
+  El::Int const width = 17;
+
+  SECTION("Data parallel")
+  {
+    InputDataParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputDataParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input, El::To<InOutDataType>(1.));
+    El::Fill(true_output, El::To<InOutDataType>(14.));
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input = grad_wrt_output;
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-42.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+
+  SECTION("Model parallel")
+  {
+    InputModelParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputModelParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input, El::To<InOutDataType>(3.));
+    El::Fill(true_output, El::To<InOutDataType>(16.));
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input = grad_wrt_output;
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-52.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("AddConstant operator serialization",
+                        "[mpi][operator][math][addconstant][serialize]",
+                        AllAddConstantOpTypes)
+{
+  using ThisOpType = TestType;
+  using BaseOpType = BaseOperatorType<ThisOpType>;
+  using BaseOpPtr = std::unique_ptr<BaseOpType>;
+  using InOutDataType = InputValueType<ThisOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+
+  auto const& g = world_comm.get_trainer_grid();
+  utils::grid_manager mgr(g);
+
+  std::stringstream ss;
+
+  // Create the objects
+  ThisOpType src_operator{El::To<InOutDataType>(12.)}, tgt_operator;
+  BaseOpPtr src_operator_ptr =
+              std::make_unique<ThisOpType>(El::To<InOutDataType>(1.)),
+            tgt_operator_ptr;
+
+#ifdef LBANN_HAS_CEREAL_BINARY_ARCHIVES
+  SECTION("Binary archive")
+  {
+    {
+      cereal::BinaryOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::BinaryInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+
+  SECTION("Rooted binary archive")
+  {
+    {
+      RootedBinaryOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedBinaryInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+#endif // LBANN_HAS_CEREAL_BINARY_ARCHIVES
+
+#ifdef LBANN_HAS_CEREAL_XML_ARCHIVES
+  SECTION("XML archive")
+  {
+    {
+      cereal::XMLOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::XMLInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+
+  SECTION("Rooted XML archive")
+  {
+    {
+      RootedXMLOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedXMLInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+#endif // LBANN_HAS_CEREAL_XML_ARCHIVES
+}
diff --git a/src/operators/math/unit_test/constant_subtract_test.cpp b/src/operators/math/unit_test/constant_subtract_test.cpp
new file mode 100644
index 00000000000..f40f975808c
--- /dev/null
+++ b/src/operators/math/unit_test/constant_subtract_test.cpp
@@ -0,0 +1,320 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+// Testing framework stuff
+#include <catch2/catch.hpp>
+
+#include "MPITestHelpers.hpp"
+#include "MatrixHelpers.hpp"
+#include "TestHelpers.hpp"
+
+#include "OperatorTraits.hpp"
+
+// CUT
+#include "lbann/operators/math/binary_with_constant.hpp"
+
+// Other stuff
+#include "lbann/proto/factories.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#include <h2/meta/Core.hpp>
+#include <h2/meta/TypeList.hpp>
+
+#include <functional>
+#include <memory>
+#include <numeric>
+#include <operators.pb.h>
+
+using namespace lbann;
+
+// Define the list of operators to test. Basically this is
+// {float,double}x{CPU,GPU}.
+template <typename T>
+using ConstantSubtractOperatorAllDevices = h2::meta::TL<
+#ifdef LBANN_HAS_GPU
+  ConstantSubtractOperator<T, El::Device::GPU>,
+#endif // LBANN_HAS_GPU
+  ConstantSubtractOperator<T, El::Device::CPU>>;
+
+using AllConstantSubtractOpTypes =
+  h2::meta::tlist::Append<ConstantSubtractOperatorAllDevices<float>,
+                          ConstantSubtractOperatorAllDevices<double>>;
+
+namespace lbann {
+template <typename T, El::Device D>
+struct OperatorTraits<ConstantSubtractOperator<T, D>>
+  : OperatorTraits<Operator<T, T, D>>
+{
+};
+} // namespace lbann
+
+// Save some typing.
+using unit_test::utilities::IsValidPtr;
+
+TEMPLATE_LIST_TEST_CASE("ConstantSubtract operator lifecycle",
+                        "[mpi][operator][math][constantsubtract][lifecycle]",
+                        AllConstantSubtractOpTypes)
+{
+  using ThisOpType = TestType;
+  using DataT = InputValueType<ThisOpType>;
+
+  SECTION("Construction with valid arguments")
+  {
+    std::unique_ptr<ThisOpType> op_ptr = nullptr;
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+    REQUIRE(op_ptr->get_constant() == El::To<DataT>(0));
+
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+  }
+  SECTION("Copy interface")
+  {
+    std::unique_ptr<ThisOpType> clone_ptr = nullptr;
+    REQUIRE_NOTHROW(clone_ptr = ThisOpType{}.clone());
+    REQUIRE(clone_ptr->get_constant() == El::To<DataT>(0));
+
+    ThisOpType op;
+    REQUIRE_NOTHROW(op = *clone_ptr);
+    REQUIRE(op.get_constant() == El::To<DataT>(0));
+  }
+  SECTION("Construct from protobuf")
+  {
+    constexpr auto D = Device<ThisOpType>;
+    using InT = InputValueType<ThisOpType>;
+    using OutT = OutputValueType<ThisOpType>;
+
+    lbann_data::Operator proto_op;
+    ThisOpType{El::To<DataT>(13.f)}.write_proto(proto_op);
+
+    std::unique_ptr<BaseOperatorType<ThisOpType>> base_ptr = nullptr;
+    REQUIRE_NOTHROW(base_ptr =
+                      proto::construct_operator<InT, OutT, D>(proto_op));
+    CHECK(base_ptr->get_type() == "subtract from constant");
+
+    auto* specific_ptr = dynamic_cast<ThisOpType*>(base_ptr.get());
+    CHECK(IsValidPtr(specific_ptr));
+    CHECK(specific_ptr->get_constant() == El::To<DataT>(13));
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("ConstantSubtract operator action",
+                        "[mpi][operator][math][constantsubtract][action]",
+                        AllConstantSubtractOpTypes)
+{
+  using ThisOpType = TestType;
+  using InOutDataType = InputValueType<ThisOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+  auto const& g = world_comm.get_trainer_grid();
+
+  // Some common data
+  ThisOpType op(El::To<InOutDataType>(13));
+
+  El::Int const height = 13;
+  El::Int const width = 17;
+
+  SECTION("Data parallel")
+  {
+    InputDataParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputDataParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input, El::To<InOutDataType>(6.));
+    El::Fill(true_output, El::To<InOutDataType>(7.));
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input = grad_wrt_output;
+    El::Scale(-1, true_grad_wrt_input);
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-42.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+
+  SECTION("Model parallel")
+  {
+    InputModelParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputModelParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input, El::To<InOutDataType>(16.));
+    El::Fill(true_output, El::To<InOutDataType>(-3.));
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input = grad_wrt_output;
+    El::Scale(-1, true_grad_wrt_input);
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-52.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("ConstantSubtract operator serialization",
+                        "[mpi][operator][math][constantsubtract][serialize]",
+                        AllConstantSubtractOpTypes)
+{
+  using ThisOpType = TestType;
+  using BaseOpType = BaseOperatorType<ThisOpType>;
+  using BaseOpPtr = std::unique_ptr<BaseOpType>;
+  using InOutDataType = InputValueType<ThisOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+
+  auto const& g = world_comm.get_trainer_grid();
+  utils::grid_manager mgr(g);
+
+  std::stringstream ss;
+
+  // Create the objects
+  ThisOpType src_operator{El::To<InOutDataType>(12.)}, tgt_operator;
+  BaseOpPtr src_operator_ptr =
+              std::make_unique<ThisOpType>(El::To<InOutDataType>(1.)),
+            tgt_operator_ptr;
+
+#ifdef LBANN_HAS_CEREAL_BINARY_ARCHIVES
+  SECTION("Binary archive")
+  {
+    {
+      cereal::BinaryOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::BinaryInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+
+  SECTION("Rooted binary archive")
+  {
+    {
+      RootedBinaryOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedBinaryInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+#endif // LBANN_HAS_CEREAL_BINARY_ARCHIVES
+
+#ifdef LBANN_HAS_CEREAL_XML_ARCHIVES
+  SECTION("XML archive")
+  {
+    {
+      cereal::XMLOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::XMLInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+
+  SECTION("Rooted XML archive")
+  {
+    {
+      RootedXMLOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedXMLInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+#endif // LBANN_HAS_CEREAL_XML_ARCHIVES
+}
diff --git a/src/operators/math/unit_test/equal_constant_test.cpp b/src/operators/math/unit_test/equal_constant_test.cpp
new file mode 100644
index 00000000000..a45b6be043b
--- /dev/null
+++ b/src/operators/math/unit_test/equal_constant_test.cpp
@@ -0,0 +1,379 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+// Testing framework stuff
+#include <catch2/catch.hpp>
+
+#include "MPITestHelpers.hpp"
+#include "MatrixHelpers.hpp"
+#include "TestHelpers.hpp"
+
+#include "OperatorTraits.hpp"
+
+// CUT
+#include "lbann/operators/math/binary_with_constant.hpp"
+
+// Other stuff
+#include "lbann/proto/factories.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#include <h2/meta/Core.hpp>
+#include <h2/meta/TypeList.hpp>
+
+#include <functional>
+#include <matrices.hpp>
+#include <memory>
+#include <numeric>
+#include <operators.pb.h>
+
+using namespace lbann;
+
+// Define the list of operators to test. Basically this is
+// {float,double}x{CPU,GPU}.
+template <typename T>
+using EqualConstantOperatorAllDevices = h2::meta::TL<
+#ifdef LBANN_HAS_GPU
+  EqualConstantOperator<T, El::Device::GPU>,
+#endif // LBANN_HAS_GPU
+  EqualConstantOperator<T, El::Device::CPU>>;
+
+using AllEqualConstantOpTypes =
+  h2::meta::tlist::Append<EqualConstantOperatorAllDevices<float>,
+                          EqualConstantOperatorAllDevices<double>>;
+
+namespace lbann {
+template <typename T, El::Device D>
+struct OperatorTraits<EqualConstantOperator<T, D>>
+  : OperatorTraits<Operator<T, T, D>>
+{
+};
+} // namespace lbann
+
+// Save some typing.
+using unit_test::utilities::IsValidPtr;
+
+TEMPLATE_LIST_TEST_CASE("EqualConstant operator lifecycle",
+                        "[mpi][operator][math][equalconstant][lifecycle]",
+                        AllEqualConstantOpTypes)
+{
+  using ThisOpType = TestType;
+  using DataT = InputValueType<ThisOpType>;
+
+  SECTION("Construction with valid arguments")
+  {
+    std::unique_ptr<ThisOpType> op_ptr = nullptr;
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+    REQUIRE(op_ptr->get_constant() == El::To<DataT>(0));
+
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+  }
+  SECTION("Copy interface")
+  {
+    std::unique_ptr<ThisOpType> clone_ptr = nullptr;
+    REQUIRE_NOTHROW(clone_ptr = ThisOpType{}.clone());
+    REQUIRE(clone_ptr->get_constant() == El::To<DataT>(0));
+
+    ThisOpType op;
+    REQUIRE_NOTHROW(op = *clone_ptr);
+    REQUIRE(op.get_constant() == El::To<DataT>(0));
+  }
+  SECTION("Construct from protobuf")
+  {
+    constexpr auto D = Device<ThisOpType>;
+    using InT = InputValueType<ThisOpType>;
+    using OutT = OutputValueType<ThisOpType>;
+
+    lbann_data::Operator proto_op;
+    ThisOpType{El::To<DataT>(13.f)}.write_proto(proto_op);
+
+    std::unique_ptr<BaseOperatorType<ThisOpType>> base_ptr = nullptr;
+    REQUIRE_NOTHROW(base_ptr =
+                      proto::construct_operator<InT, OutT, D>(proto_op));
+    CHECK(base_ptr->get_type() == "equals constant");
+
+    auto* specific_ptr = dynamic_cast<ThisOpType*>(base_ptr.get());
+    CHECK(IsValidPtr(specific_ptr));
+    CHECK(specific_ptr->get_constant() == El::To<DataT>(13));
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("EqualConstant operator action",
+                        "[mpi][operator][math][equalconstant][action]",
+                        AllEqualConstantOpTypes)
+{
+  using ThisOpType = TestType;
+  using InOutDataType = InputValueType<ThisOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+  auto const& g = world_comm.get_trainer_grid();
+
+  // Some common data
+  ThisOpType op(El::To<InOutDataType>(13));
+
+  El::Int const height = 13;
+  El::Int const width = 17;
+
+  SECTION("Data parallel - all equal")
+  {
+    InputDataParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputDataParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input, El::To<InOutDataType>(13.));
+    El::Fill(true_output, El::To<InOutDataType>(1.));
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input = grad_wrt_output;
+    El::Zero(true_grad_wrt_input);
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-42.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+
+  SECTION("Data parallel - none equal")
+  {
+    InputDataParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputDataParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::MakeUniform(input);
+    El::Zero(true_output);
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input = grad_wrt_output;
+    El::Zero(true_grad_wrt_input);
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-42.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+
+  SECTION("Model parallel - all equal")
+  {
+    InputModelParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputModelParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input, El::To<InOutDataType>(13.));
+    El::Fill(true_output, El::To<InOutDataType>(1.));
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input = grad_wrt_output;
+    El::Zero(true_grad_wrt_input);
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-52.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+
+  SECTION("Model parallel - none equal")
+  {
+    InputModelParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputModelParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::MakeUniform(input);
+    El::Zero(true_output);
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input = grad_wrt_output;
+    El::Zero(true_grad_wrt_input);
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-52.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("EqualConstant operator serialization",
+                        "[mpi][operator][math][equalconstant][serialize]",
+                        AllEqualConstantOpTypes)
+{
+  using ThisOpType = TestType;
+  using BaseOpType = BaseOperatorType<ThisOpType>;
+  using BaseOpPtr = std::unique_ptr<BaseOpType>;
+  using InOutDataType = InputValueType<ThisOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+
+  auto const& g = world_comm.get_trainer_grid();
+  utils::grid_manager mgr(g);
+
+  std::stringstream ss;
+
+  // Create the objects
+  ThisOpType src_operator{El::To<InOutDataType>(12.)}, tgt_operator;
+  BaseOpPtr src_operator_ptr =
+              std::make_unique<ThisOpType>(El::To<InOutDataType>(1.)),
+            tgt_operator_ptr;
+
+#ifdef LBANN_HAS_CEREAL_BINARY_ARCHIVES
+  SECTION("Binary archive")
+  {
+    {
+      cereal::BinaryOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::BinaryInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+
+  SECTION("Rooted binary archive")
+  {
+    {
+      RootedBinaryOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedBinaryInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+#endif // LBANN_HAS_CEREAL_BINARY_ARCHIVES
+
+#ifdef LBANN_HAS_CEREAL_XML_ARCHIVES
+  SECTION("XML archive")
+  {
+    {
+      cereal::XMLOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::XMLInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+
+  SECTION("Rooted XML archive")
+  {
+    {
+      RootedXMLOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedXMLInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+#endif // LBANN_HAS_CEREAL_XML_ARCHIVES
+}
diff --git a/src/operators/math/unit_test/not_equal_constant_test.cpp b/src/operators/math/unit_test/not_equal_constant_test.cpp
new file mode 100644
index 00000000000..b602bd20b75
--- /dev/null
+++ b/src/operators/math/unit_test/not_equal_constant_test.cpp
@@ -0,0 +1,379 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+// Testing framework stuff
+#include <catch2/catch.hpp>
+
+#include "MPITestHelpers.hpp"
+#include "MatrixHelpers.hpp"
+#include "TestHelpers.hpp"
+
+#include "OperatorTraits.hpp"
+
+// CUT
+#include "lbann/operators/math/binary_with_constant.hpp"
+
+// Other stuff
+#include "lbann/proto/factories.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#include <h2/meta/Core.hpp>
+#include <h2/meta/TypeList.hpp>
+
+#include <functional>
+#include <matrices.hpp>
+#include <memory>
+#include <numeric>
+#include <operators.pb.h>
+
+using namespace lbann;
+
+// Define the list of operators to test. Basically this is
+// {float,double}x{CPU,GPU}.
+template <typename T>
+using NotEqualConstantOperatorAllDevices = h2::meta::TL<
+#ifdef LBANN_HAS_GPU
+  NotEqualConstantOperator<T, El::Device::GPU>,
+#endif // LBANN_HAS_GPU
+  NotEqualConstantOperator<T, El::Device::CPU>>;
+
+using AllNotEqualConstantOpTypes =
+  h2::meta::tlist::Append<NotEqualConstantOperatorAllDevices<float>,
+                          NotEqualConstantOperatorAllDevices<double>>;
+
+namespace lbann {
+template <typename T, El::Device D>
+struct OperatorTraits<NotEqualConstantOperator<T, D>>
+  : OperatorTraits<Operator<T, T, D>>
+{
+};
+} // namespace lbann
+
+// Save some typing.
+using unit_test::utilities::IsValidPtr;
+
+TEMPLATE_LIST_TEST_CASE("EqualConstant operator lifecycle",
+                        "[mpi][operator][math][notequalconstant][lifecycle]",
+                        AllNotEqualConstantOpTypes)
+{
+  using ThisOpType = TestType;
+  using DataT = InputValueType<ThisOpType>;
+
+  SECTION("Construction with valid arguments")
+  {
+    std::unique_ptr<ThisOpType> op_ptr = nullptr;
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+    REQUIRE(op_ptr->get_constant() == El::To<DataT>(0));
+
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+  }
+  SECTION("Copy interface")
+  {
+    std::unique_ptr<ThisOpType> clone_ptr = nullptr;
+    REQUIRE_NOTHROW(clone_ptr = ThisOpType{}.clone());
+    REQUIRE(clone_ptr->get_constant() == El::To<DataT>(0));
+
+    ThisOpType op;
+    REQUIRE_NOTHROW(op = *clone_ptr);
+    REQUIRE(op.get_constant() == El::To<DataT>(0));
+  }
+  SECTION("Construct from protobuf")
+  {
+    constexpr auto D = Device<ThisOpType>;
+    using InT = InputValueType<ThisOpType>;
+    using OutT = OutputValueType<ThisOpType>;
+
+    lbann_data::Operator proto_op;
+    ThisOpType{El::To<DataT>(13.f)}.write_proto(proto_op);
+
+    std::unique_ptr<BaseOperatorType<ThisOpType>> base_ptr = nullptr;
+    REQUIRE_NOTHROW(base_ptr =
+                      proto::construct_operator<InT, OutT, D>(proto_op));
+    CHECK(base_ptr->get_type() == "not equals constant");
+
+    auto* specific_ptr = dynamic_cast<ThisOpType*>(base_ptr.get());
+    CHECK(IsValidPtr(specific_ptr));
+    CHECK(specific_ptr->get_constant() == El::To<DataT>(13));
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("EqualConstant operator action",
+                        "[mpi][operator][math][notequalconstant][action]",
+                        AllNotEqualConstantOpTypes)
+{
+  using ThisOpType = TestType;
+  using InOutDataType = InputValueType<ThisOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+  auto const& g = world_comm.get_trainer_grid();
+
+  // Some common data
+  ThisOpType op(El::To<InOutDataType>(13));
+
+  El::Int const height = 13;
+  El::Int const width = 17;
+
+  SECTION("Data parallel - all equal")
+  {
+    InputDataParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputDataParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input, El::To<InOutDataType>(13.));
+    El::Zero(true_output);
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input = grad_wrt_output;
+    El::Zero(true_grad_wrt_input);
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-42.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+
+  SECTION("Data parallel - none equal")
+  {
+    InputDataParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputDataParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::MakeUniform(input);
+    El::Fill(true_output, El::To<InOutDataType>(1.));
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input = grad_wrt_output;
+    El::Zero(true_grad_wrt_input);
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-42.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+
+  SECTION("Model parallel - all equal")
+  {
+    InputModelParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputModelParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input, El::To<InOutDataType>(13.));
+    El::Zero(true_output);
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input = grad_wrt_output;
+    El::Zero(true_grad_wrt_input);
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-52.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+
+  SECTION("Model parallel - none equal")
+  {
+    InputModelParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputModelParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::MakeUniform(input);
+    El::Fill(true_output, El::To<InOutDataType>(1.));
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input = grad_wrt_output;
+    El::Zero(true_grad_wrt_input);
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-52.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("EqualConstant operator serialization",
+                        "[mpi][operator][math][notequalconstant][serialize]",
+                        AllNotEqualConstantOpTypes)
+{
+  using ThisOpType = TestType;
+  using BaseOpType = BaseOperatorType<ThisOpType>;
+  using BaseOpPtr = std::unique_ptr<BaseOpType>;
+  using InOutDataType = InputValueType<ThisOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+
+  auto const& g = world_comm.get_trainer_grid();
+  utils::grid_manager mgr(g);
+
+  std::stringstream ss;
+
+  // Create the objects
+  ThisOpType src_operator{El::To<InOutDataType>(12.)}, tgt_operator;
+  BaseOpPtr src_operator_ptr =
+              std::make_unique<ThisOpType>(El::To<InOutDataType>(1.)),
+            tgt_operator_ptr;
+
+#ifdef LBANN_HAS_CEREAL_BINARY_ARCHIVES
+  SECTION("Binary archive")
+  {
+    {
+      cereal::BinaryOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::BinaryInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+
+  SECTION("Rooted binary archive")
+  {
+    {
+      RootedBinaryOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedBinaryInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+#endif // LBANN_HAS_CEREAL_BINARY_ARCHIVES
+
+#ifdef LBANN_HAS_CEREAL_XML_ARCHIVES
+  SECTION("XML archive")
+  {
+    {
+      cereal::XMLOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::XMLInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+
+  SECTION("Rooted XML archive")
+  {
+    {
+      RootedXMLOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedXMLInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+#endif // LBANN_HAS_CEREAL_XML_ARCHIVES
+}
diff --git a/src/operators/math/unit_test/scale_test.cpp b/src/operators/math/unit_test/scale_test.cpp
new file mode 100644
index 00000000000..c5dc8e26a6c
--- /dev/null
+++ b/src/operators/math/unit_test/scale_test.cpp
@@ -0,0 +1,319 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+// Testing framework stuff
+#include <catch2/catch.hpp>
+
+#include "MPITestHelpers.hpp"
+#include "MatrixHelpers.hpp"
+#include "TestHelpers.hpp"
+
+#include "OperatorTraits.hpp"
+
+// CUT
+#include "lbann/operators/math/binary_with_constant.hpp"
+
+// Other stuff
+#include "lbann/proto/factories.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#include <h2/meta/Core.hpp>
+#include <h2/meta/TypeList.hpp>
+
+#include <functional>
+#include <memory>
+#include <numeric>
+#include <operators.pb.h>
+
+using namespace lbann;
+
+// Define the list of operators to test. Basically this is
+// {float,double}x{CPU,GPU}.
+template <typename T>
+using ScaleOperatorAllDevices = h2::meta::TL<
+#ifdef LBANN_HAS_GPU
+  ScaleOperator<T, El::Device::GPU>,
+#endif // LBANN_HAS_GPU
+  ScaleOperator<T, El::Device::CPU>>;
+
+using AllScaleOpTypes =
+  h2::meta::tlist::Append<ScaleOperatorAllDevices<float>,
+                          ScaleOperatorAllDevices<double>>;
+
+namespace lbann {
+template <typename T, El::Device D>
+struct OperatorTraits<ScaleOperator<T, D>> : OperatorTraits<Operator<T, T, D>>
+{
+};
+} // namespace lbann
+
+// Save some typing.
+using unit_test::utilities::IsValidPtr;
+
+TEMPLATE_LIST_TEST_CASE("Scale operator lifecycle",
+                        "[mpi][operator][math][scale][lifecycle]",
+                        AllScaleOpTypes)
+{
+  using ThisOpType = TestType;
+  using DataT = InputValueType<ThisOpType>;
+
+  SECTION("Construction with valid arguments")
+  {
+    std::unique_ptr<ThisOpType> op_ptr = nullptr;
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+    REQUIRE(op_ptr->get_constant() == El::To<DataT>(0));
+
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+  }
+  SECTION("Copy interface")
+  {
+    std::unique_ptr<ThisOpType> clone_ptr = nullptr;
+    REQUIRE_NOTHROW(clone_ptr = ThisOpType{}.clone());
+    REQUIRE(clone_ptr->get_constant() == El::To<DataT>(0));
+
+    ThisOpType op;
+    REQUIRE_NOTHROW(op = *clone_ptr);
+    REQUIRE(op.get_constant() == El::To<DataT>(0));
+  }
+  SECTION("Construct from protobuf")
+  {
+    constexpr auto D = Device<ThisOpType>;
+    using InT = InputValueType<ThisOpType>;
+    using OutT = OutputValueType<ThisOpType>;
+
+    lbann_data::Operator proto_op;
+    ThisOpType{El::To<DataT>(13.f)}.write_proto(proto_op);
+
+    std::unique_ptr<BaseOperatorType<ThisOpType>> base_ptr = nullptr;
+    REQUIRE_NOTHROW(base_ptr =
+                      proto::construct_operator<InT, OutT, D>(proto_op));
+    CHECK(base_ptr->get_type() == "scale");
+
+    auto* specific_ptr = dynamic_cast<ThisOpType*>(base_ptr.get());
+    CHECK(IsValidPtr(specific_ptr));
+    CHECK(specific_ptr->get_constant() == El::To<DataT>(13));
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("Scale operator action",
+                        "[mpi][operator][math][scale][action]",
+                        AllScaleOpTypes)
+{
+  using ThisOpType = TestType;
+  using InOutDataType = InputValueType<ThisOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+  auto const& g = world_comm.get_trainer_grid();
+
+  // Some common data
+  ThisOpType op(El::To<InOutDataType>(13));
+
+  El::Int const height = 13;
+  El::Int const width = 17;
+
+  SECTION("Data parallel")
+  {
+    InputDataParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputDataParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input, El::To<InOutDataType>(2.));
+    El::Fill(true_output, El::To<InOutDataType>(26.));
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input = grad_wrt_output;
+    El::Scale(op.get_constant(), true_grad_wrt_input);
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-42.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+
+  SECTION("Model parallel")
+  {
+    InputModelParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputModelParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input, El::To<InOutDataType>(1.));
+    El::Fill(true_output, El::To<InOutDataType>(13.));
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input = grad_wrt_output;
+    El::Scale(op.get_constant(), true_grad_wrt_input);
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-52.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("Scale operator serialization",
+                        "[mpi][operator][math][scale][serialize]",
+                        AllScaleOpTypes)
+{
+  using ThisOpType = TestType;
+  using BaseOpType = BaseOperatorType<ThisOpType>;
+  using BaseOpPtr = std::unique_ptr<BaseOpType>;
+  using InOutDataType = InputValueType<ThisOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+
+  auto const& g = world_comm.get_trainer_grid();
+  utils::grid_manager mgr(g);
+
+  std::stringstream ss;
+
+  // Create the objects
+  ThisOpType src_operator{El::To<InOutDataType>(12.)}, tgt_operator;
+  BaseOpPtr src_operator_ptr =
+              std::make_unique<ThisOpType>(El::To<InOutDataType>(1.)),
+            tgt_operator_ptr;
+
+#ifdef LBANN_HAS_CEREAL_BINARY_ARCHIVES
+  SECTION("Binary archive")
+  {
+    {
+      cereal::BinaryOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::BinaryInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+
+  SECTION("Rooted binary archive")
+  {
+    {
+      RootedBinaryOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedBinaryInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+#endif // LBANN_HAS_CEREAL_BINARY_ARCHIVES
+
+#ifdef LBANN_HAS_CEREAL_XML_ARCHIVES
+  SECTION("XML archive")
+  {
+    {
+      cereal::XMLOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::XMLInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+
+  SECTION("Rooted XML archive")
+  {
+    {
+      RootedXMLOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedXMLInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+#endif // LBANN_HAS_CEREAL_XML_ARCHIVES
+}
diff --git a/src/operators/math/unit_test/subtract_constant_test.cpp b/src/operators/math/unit_test/subtract_constant_test.cpp
new file mode 100644
index 00000000000..99e86bdcee4
--- /dev/null
+++ b/src/operators/math/unit_test/subtract_constant_test.cpp
@@ -0,0 +1,318 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+// Testing framework stuff
+#include <catch2/catch.hpp>
+
+#include "MPITestHelpers.hpp"
+#include "MatrixHelpers.hpp"
+#include "TestHelpers.hpp"
+
+#include "OperatorTraits.hpp"
+
+// CUT
+#include "lbann/operators/math/binary_with_constant.hpp"
+
+// Other stuff
+#include "lbann/proto/factories.hpp"
+#include "lbann/utils/serialize.hpp"
+
+#include <h2/meta/Core.hpp>
+#include <h2/meta/TypeList.hpp>
+
+#include <functional>
+#include <memory>
+#include <numeric>
+#include <operators.pb.h>
+
+using namespace lbann;
+
+// Define the list of operators to test. Basically this is
+// {float,double}x{CPU,GPU}.
+template <typename T>
+using SubtractConstantOperatorAllDevices = h2::meta::TL<
+#ifdef LBANN_HAS_GPU
+  SubtractConstantOperator<T, El::Device::GPU>,
+#endif // LBANN_HAS_GPU
+  SubtractConstantOperator<T, El::Device::CPU>>;
+
+using AllSubtractConstantOpTypes =
+  h2::meta::tlist::Append<SubtractConstantOperatorAllDevices<float>,
+                          SubtractConstantOperatorAllDevices<double>>;
+
+namespace lbann {
+template <typename T, El::Device D>
+struct OperatorTraits<SubtractConstantOperator<T, D>>
+  : OperatorTraits<Operator<T, T, D>>
+{
+};
+} // namespace lbann
+
+// Save some typing.
+using unit_test::utilities::IsValidPtr;
+
+TEMPLATE_LIST_TEST_CASE("SubtractConstant operator lifecycle",
+                        "[mpi][operator][math][subtractconstant][lifecycle]",
+                        AllSubtractConstantOpTypes)
+{
+  using ThisOpType = TestType;
+  using DataT = InputValueType<ThisOpType>;
+
+  SECTION("Construction with valid arguments")
+  {
+    std::unique_ptr<ThisOpType> op_ptr = nullptr;
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+    REQUIRE(op_ptr->get_constant() == El::To<DataT>(0));
+
+    REQUIRE_NOTHROW(op_ptr = std::make_unique<ThisOpType>());
+    REQUIRE(IsValidPtr(op_ptr));
+  }
+  SECTION("Copy interface")
+  {
+    std::unique_ptr<ThisOpType> clone_ptr = nullptr;
+    REQUIRE_NOTHROW(clone_ptr = ThisOpType{}.clone());
+    REQUIRE(clone_ptr->get_constant() == El::To<DataT>(0));
+
+    ThisOpType op;
+    REQUIRE_NOTHROW(op = *clone_ptr);
+    REQUIRE(op.get_constant() == El::To<DataT>(0));
+  }
+  SECTION("Construct from protobuf")
+  {
+    constexpr auto D = Device<ThisOpType>;
+    using InT = InputValueType<ThisOpType>;
+    using OutT = OutputValueType<ThisOpType>;
+
+    lbann_data::Operator proto_op;
+    ThisOpType{El::To<DataT>(13.f)}.write_proto(proto_op);
+
+    std::unique_ptr<BaseOperatorType<ThisOpType>> base_ptr = nullptr;
+    REQUIRE_NOTHROW(base_ptr =
+                      proto::construct_operator<InT, OutT, D>(proto_op));
+    CHECK(base_ptr->get_type() == "subtract constant");
+
+    auto* specific_ptr = dynamic_cast<ThisOpType*>(base_ptr.get());
+    CHECK(IsValidPtr(specific_ptr));
+    CHECK(specific_ptr->get_constant() == El::To<DataT>(13));
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("SubtractConstant operator action",
+                        "[mpi][operator][math][subtractconstant][action]",
+                        AllSubtractConstantOpTypes)
+{
+  using ThisOpType = TestType;
+  using InOutDataType = InputValueType<ThisOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+  auto const& g = world_comm.get_trainer_grid();
+
+  // Some common data
+  ThisOpType op(El::To<InOutDataType>(13));
+
+  El::Int const height = 13;
+  El::Int const width = 17;
+
+  SECTION("Data parallel")
+  {
+    InputDataParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputDataParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input, El::To<InOutDataType>(14.));
+    El::Fill(true_output, El::To<InOutDataType>(1.));
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input = grad_wrt_output;
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-42.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+
+  SECTION("Model parallel")
+  {
+    InputModelParallelMatType<ThisOpType> input(height, width, g, 0),
+      grad_wrt_input(height, width, g, 0),
+      true_grad_wrt_input(height, width, g, 0);
+    OutputModelParallelMatType<ThisOpType> output(height, width, g, 0),
+      grad_wrt_output(height, width, g, 0), true_output(height, width, g, 0);
+
+    // Setup inputs/outputs
+    El::Fill(input, El::To<InOutDataType>(16.));
+    El::Fill(true_output, El::To<InOutDataType>(3.));
+
+    El::MakeUniform(grad_wrt_output);
+    true_grad_wrt_input = grad_wrt_output;
+
+    El::Fill(output, El::To<InOutDataType>(-32.)); // Fill out of range.
+    El::Fill(grad_wrt_input,
+             El::To<InOutDataType>(-52.)); // Fill out of range.
+
+    CHECK_FALSE(true_output == output);
+    REQUIRE_NOTHROW(op.fp_compute({input}, {output}));
+    CHECK(true_output == output);
+
+    REQUIRE_NOTHROW(
+      op.bp_compute({input}, {grad_wrt_output}, {grad_wrt_input}));
+    CHECK(true_grad_wrt_input == grad_wrt_input);
+  }
+}
+
+TEMPLATE_LIST_TEST_CASE("SubtractConstant operator serialization",
+                        "[mpi][operator][math][subtractconstant][serialize]",
+                        AllSubtractConstantOpTypes)
+{
+  using ThisOpType = TestType;
+  using BaseOpType = BaseOperatorType<ThisOpType>;
+  using BaseOpPtr = std::unique_ptr<BaseOpType>;
+  using InOutDataType = InputValueType<ThisOpType>;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+
+  auto const& g = world_comm.get_trainer_grid();
+  utils::grid_manager mgr(g);
+
+  std::stringstream ss;
+
+  // Create the objects
+  ThisOpType src_operator{El::To<InOutDataType>(12.)}, tgt_operator;
+  BaseOpPtr src_operator_ptr =
+              std::make_unique<ThisOpType>(El::To<InOutDataType>(1.)),
+            tgt_operator_ptr;
+
+#ifdef LBANN_HAS_CEREAL_BINARY_ARCHIVES
+  SECTION("Binary archive")
+  {
+    {
+      cereal::BinaryOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::BinaryInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+
+  SECTION("Rooted binary archive")
+  {
+    {
+      RootedBinaryOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedBinaryInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+#endif // LBANN_HAS_CEREAL_BINARY_ARCHIVES
+
+#ifdef LBANN_HAS_CEREAL_XML_ARCHIVES
+  SECTION("XML archive")
+  {
+    {
+      cereal::XMLOutputArchive oarchive(ss);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      cereal::XMLInputArchive iarchive(ss);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+
+  SECTION("Rooted XML archive")
+  {
+    {
+      RootedXMLOutputArchive oarchive(ss, g);
+      REQUIRE_NOTHROW(oarchive(src_operator));
+      REQUIRE_NOTHROW(oarchive(src_operator_ptr));
+    }
+
+    {
+      RootedXMLInputArchive iarchive(ss, g);
+      REQUIRE_NOTHROW(iarchive(tgt_operator));
+      REQUIRE_NOTHROW(iarchive(tgt_operator_ptr));
+      CHECK(IsValidPtr(tgt_operator_ptr));
+    }
+
+    // Check the by-base-ptr serialization.
+    auto const* concrete_ptr =
+      dynamic_cast<ThisOpType const*>(tgt_operator_ptr.get());
+    REQUIRE(IsValidPtr(tgt_operator_ptr));
+    CHECK(IsValidPtr(concrete_ptr));
+    CHECK(concrete_ptr->get_constant() == El::To<InOutDataType>(1.));
+    CHECK(tgt_operator.get_constant() == El::To<InOutDataType>(12.));
+  }
+#endif // LBANN_HAS_CEREAL_XML_ARCHIVES
+}
diff --git a/src/proto/operators.proto b/src/proto/operators.proto
index 03e80ed0c40..b7d5d526f40 100644
--- a/src/proto/operators.proto
+++ b/src/proto/operators.proto
@@ -103,6 +103,46 @@ message LogicalAndOperator {}
 message LogicalOrOperator {}
 message LogicalXorOperator {}
 
+/// @}
+/** @name Binary-with-constant Operators */
+/// @{
+message AddConstantOperator {
+  double constant = 1;
+}
+message ScaleOperator {
+  double constant = 1;
+}
+message SubtractConstantOperator {
+  double constant = 1;
+}
+message ConstantSubtractOperator {
+  double constant = 1;
+}
+message MaxConstantOperator {
+  double constant = 1;
+}
+message MinConstantOperator {
+  double constant = 1;
+}
+message NotEqualConstantOperator {
+    double constant = 1;
+}
+message EqualConstantOperator {
+    double constant = 1;
+}
+message LessEqualConstantOperator {
+    double constant = 1;
+}
+message LessConstantOperator {
+    double constant = 1;
+}
+message GreaterEqualConstantOperator {
+    double constant = 1;
+}
+message GreaterConstantOperator {
+    double constant = 1;
+}
+
 /// @}
 /** @name Activation Operators */
 /// @{

From c4641269f23760a12b9b56e0dd30a60e0f28f259 Mon Sep 17 00:00:00 2001
From: Brian Van Essen <vanessen1@llnl.gov>
Date: Tue, 14 Sep 2021 14:06:32 -0700
Subject: [PATCH 03/37] Enable data reader input layer fields (#1951)

* Changed the input_layer to only fetch data for a specific field that
is set by the user during model construction.  Each instance of the
input layer is configured using the data_field parameter in the PFE.
Input layers are now single output only, and no longer support the
target_mode feature.  For the initial commit the only valid values of
the data_field parameter are: datum, label, responses.

Updated the identity and argmax unit tests to use the new input layer
format.

* Updated all unit tests with empty input layers to fetch the datum
field.

* Fixed the input layer so that it will return the proper dimensions
based on the data field string provided.  Removed the
data_reader_mode_field from the input layer.

* Updated test models and unit tests to use multiple input layers for
the sample datum and labels.

* Updated the reconstruction test to use data on the VAST file system
rather than Lustre.

* Updated the application models to use the new input layer format.
Note that this does not change how the data is ingested, e.g. it does
not pull in multiple fields to avoid splits and slices, but rather
just using the explicit: datum, labels, responses fields.

* Changed from using the data field name datum to samples.

* Fixed the integration tests to also use the samples field name rather
than the datum field name.

* Changed the interaction between the data coordinator and input layers,
so that the input layer will register each data field that is active
in a model.  The data coordinator switches the the setup of the data
buffers from the setup() function to the setup_data_fields() function
that is called after the model's setup() function.  This restructuring
is necessary to accommodate arbitrary, model defined data field names
without having to fetch every possible field of a sample.

* Added two unit tests for the synthetic data reader, one using the
public API and one directly accessing the internal APIs (fetch_datum,
fetch_label, fetch_response).

Fixed a bug in the synthetic data reader where the flags for having
labels or responses was not properly set during construction.

Added temporary debugging code to the synthetic data reader to track
the random values being generated.

* Updated the internals of the data reader class structure so that the
I/O threading is hoisted out of the fetch_data function and into
primary fetch function.  The fetch function now directly calls the
fetch_data_block function, and the fetch_data_block function then
calls the data reader specific fetch_datum, fetch_label, and
fetch_response functions based on what data fields are active.  This
ensures that labels and response values are fetched from the I/O
thread pool, rather than just the primary I/O thread.  This also
removes the fetch_data, fetch_labels, and fetch_responses as
unnecessary levels of the function hierarchy.

As a result of these changes the JAG data reader specialization of the
fetch_data, _labels, and _responses are removed, as well as the resuse
functions.

Note that the updated python data reader only supports filling in the
samples data field.

* Added test file for the public API of the data reader.

* Updated the synthetic test to sweep the size of the data sample and response.

* Fixed a bug in how the synthetic data reader tests were using normal
distributions with odd numbers of values.  Improved the tests to work
with multple numbers of samples, dimensions, and number of labels.

* Fixed a bug in the thread pool destructor that was causing issues in
catch testing the data readers.  Thread pool destructor now calls the
reap_threads function to properly tear everything down.

* Added data reader public API test for responses.  Improved the tests
to sweep multiple variables through the public API.

* Updated the synthetic data reader API tests to use catch2 generators.

* Fixed the internal synthetic data reader APIs to use the GENERATE macros.

* Removed temporary debugging code

* Fixed a bug where the matrix for the labels data field has to be
zeroed out prior to calling fetch_label because most implementations
of that will only set the single field corresponding to the
categorical value.

In the future, this requirement should be pushed into the data reader
specific implementation, since the need will vary for each data field.

* Fixed the prototext in the model catch tests

* Fixed the ATOM integration test to use the new input layer.

* Switched the buffered data coordinator and the data readers to use
string-based data_field_type data fields for managing buffers rather
than the enum class input_data_type.  Deprecated and removed the
input_data_type enum class.

Changed the buffered_data_coordinator so that storage is only
allocated for the data fields that a model uses.  Allocation is
triggered when the input layer registers its data field.

Added a function to the data reader class to get the linearized size
of any data field.

* Updated copyright date

* Apply suggestions from code review

Co-authored-by: Tom Benson <30674819+benson31@users.noreply.github.com>

* Removed unnecessary Identity layers.

* Removed unnecessary Identity layers around Inputs.

* Removed Identity layer wrapping Input layer in tests.

* Apply suggestions from code review

Co-authored-by: Tom Benson <30674819+benson31@users.noreply.github.com>

* Changed the io_data_buffer class is only initialized if a new
data_field key doesn't already exist.

* Fixed the input layer

* Updated the LTFB algorithms to add the call to setup the data_fields
in the data coordinator after a model is setup.

* Added a guard to throw an error if an input layer is setup without a proper data field.

* Fixed the LTFB callback to setup the data fields in the data
coordinator after the models are exchanged and re-setup.

* Fixed data_field name to be samples, not datum.

* Fixed the data_field name in the Input layers.

* Changed the behavior of the buffered data coordinator to call the
setup_data_fields function whenever a data field is registered by an
input layer.  Added a guard to the setup_data_fields to avoid resizing
and zeroing a buffer if it is already allocated.  Added a guard to
avoid reallocating the buffer for data fields that are already
registered.

Removed calls to setup_data_field from the trainer and LTFB calls.

* Fixed prototext

* Updated the Release Notes.

* Fixed the model and execution algorithm catch2 tests to construct a
trainer so that the model can register the input layer with a valid
data coordinator.

* Apply suggestions from code review

Co-authored-by: Tim Moon <moon13@llnl.gov>

* Cleaned up the naming scheme for the HDF5 data reader's data field
parameter.

* Added a guard to the buffered data coordinator to throw an exception
if a data field is requested that has not been registered.  Fixed how
data is distributed to the input layer's input buffer for DistConv.

* Restoring the behavior of the partial_mini_batch_size on the buffered data coordinator.

* Revert "Restoring the behavior of the partial_mini_batch_size on the buffered data coordinator."

This reverts commit 9bf63679e6d2931fcd605b7f2c6035eea2783d24.

* Restoring the behavior of the partial_mini_batch_size on the buffered data coordinator.

* Tidied and cleaned up new code changes with clang-format using the
following command.

git diff -U0 --no-color develop HEAD -- "*.cpp" "*.hpp" | /usr/tce/packages/clang/clang-11.0.1/share/clang/clang-format-diff.py -i -p1

* Update include/lbann/data_coordinator/buffered_data_coordinator.hpp

Co-authored-by: Tom Benson <30674819+benson31@users.noreply.github.com>

Co-authored-by: Tom Benson <30674819+benson31@users.noreply.github.com>
Co-authored-by: Tim Moon <moon13@llnl.gov>
---
 ReleaseNotes.txt                              |  13 +-
 applications/ATOM/eval_atom_wae.py            |   2 +-
 applications/ATOM/eval_atom_wae_dec.py        |   8 +-
 applications/ATOM/train_atom_char_rnn.py      |   2 +-
 applications/ATOM/train_atom_vae.py           |   2 +-
 applications/ATOM/train_atom_wae.py           |   2 +-
 applications/CANDLE/pilot1/train_combo.py     |   9 +-
 applications/graph/GNN/Dense_Graph_Trainer.py | 110 +++----
 applications/graph/GNN/NNConvModel.py         |  42 +--
 .../graph/GNN/Sparse_Graph_Trainer.py         | 134 ++++----
 .../graph/communityGAN/model/__init__.py      |   2 +-
 applications/graph/motif/model/__init__.py    |   2 +-
 applications/graph/node2vec/main.py           |   2 +-
 applications/graph/node2vec/randproj.py       |   2 +-
 applications/nlp/rnn/main.py                  |   2 +-
 .../nlp/transformer/subgraph/train.py         |   2 +-
 .../nlp/transformer/subgraph/train_infer.py   |   4 +-
 applications/nlp/transformer/train.py         |   2 +-
 applications/optimizers/kfac/kfac.py          |   5 +-
 .../physics/ICF/eval_macc_surrogate.py        |   2 +-
 applications/physics/ICF/pre_train_jag_wae.py |   2 +-
 applications/physics/ICF/train_jag_wae.py     |   2 +-
 .../physics/ICF/train_macc_surrogate.py       |   2 +-
 .../physics/cosmology/ExaGAN/train_exagan.py  |   2 +-
 .../physics/cosmology/cosmoflow/cosmoflow.py  |   8 +-
 applications/selfsupervised/classifier.py     |   7 +-
 .../selfsupervised/pretrain_siamese.py        |   2 +-
 applications/vision/alexnet.py                |   7 +-
 applications/vision/densenet.py               |  37 +--
 applications/vision/lenet.py                  |   7 +-
 applications/vision/resnet.py                 |   7 +-
 .../test_integration_alexnet.py               |   5 +-
 .../test_integration_atom_wae.py              |   2 +-
 .../test_integration_lenet.py                 |   7 +-
 .../test_integration_resnet50.py              |   5 +-
 bamboo/unit_tests/test_unit_algo_kfac.py      |   2 +-
 bamboo/unit_tests/test_unit_algo_ltfb.py      |   2 +-
 .../test_unit_algo_ltfb_trunc_selection.py    |  16 +-
 bamboo/unit_tests/test_unit_callback_ltfb.py  |   2 +-
 .../test_unit_callback_ltfb_data.py           |   3 +-
 .../test_unit_callback_set_weights_value.py   |   2 +-
 .../unit_tests/test_unit_checkpoint_lenet.py  |   9 +-
 .../unit_tests/test_unit_datareader_python.py |   2 +-
 .../test_unit_datastore_imagenet.py           |   2 +-
 bamboo/unit_tests/test_unit_layer_argmax.py   |   2 +-
 bamboo/unit_tests/test_unit_layer_argmin.py   |   2 +-
 .../test_unit_layer_batch_normalization.py    |   2 +-
 .../test_unit_layer_batched_matmul.py         |   2 +-
 .../test_unit_layer_batchwise_reduce_sum.py   |   2 +-
 ..._unit_layer_channelwise_fully_connected.py |   2 +-
 ...er_channelwise_fully_connected_distconv.py |   4 +-
 .../test_unit_layer_channelwise_gru_cell.py   |   2 +-
 .../test_unit_layer_channelwise_scale_bias.py |   2 +-
 .../test_unit_layer_channelwise_softmax.py    |   2 +-
 bamboo/unit_tests/test_unit_layer_clamp.py    |   2 +-
 .../unit_tests/test_unit_layer_concatenate.py |   2 +-
 .../unit_tests/test_unit_layer_convolution.py |   2 +-
 .../test_unit_layer_convolution_distconv.py   |   2 +-
 .../unit_tests/test_unit_layer_covariance.py  |   2 +-
 .../test_unit_layer_cross_entropy.py          |   2 +-
 bamboo/unit_tests/test_unit_layer_dft_abs.py  |   2 +-
 .../test_unit_layer_dist_embedding.py         |   2 +-
 bamboo/unit_tests/test_unit_layer_elu.py      |   2 +-
 .../unit_tests/test_unit_layer_embedding.py   |   2 +-
 ...nit_layer_entrywise_batch_normalization.py |   2 +-
 .../test_unit_layer_entrywise_scale_bias.py   |   2 +-
 bamboo/unit_tests/test_unit_layer_erf.py      |   2 +-
 bamboo/unit_tests/test_unit_layer_erfinv.py   |   2 +-
 .../test_unit_layer_fully_connected.py        |   2 +-
 bamboo/unit_tests/test_unit_layer_gather.py   |  10 +-
 bamboo/unit_tests/test_unit_layer_gru.py      |   2 +-
 bamboo/unit_tests/test_unit_layer_identity.py |   2 +-
 .../test_unit_layer_identity_distconv.py      |   2 +-
 .../test_unit_layer_instance_norm.py          |   2 +-
 bamboo/unit_tests/test_unit_layer_l1_norm.py  |   2 +-
 .../unit_tests/test_unit_layer_layer_norm.py  |   2 +-
 .../unit_tests/test_unit_layer_leaky_relu.py  |   2 +-
 .../test_unit_layer_leaky_relu_distconv.py    |   2 +-
 .../unit_tests/test_unit_layer_log_sigmoid.py |   2 +-
 .../unit_tests/test_unit_layer_log_softmax.py |   2 +-
 bamboo/unit_tests/test_unit_layer_matmul.py   |   2 +-
 .../test_unit_layer_mean_absolute_error.py    |   2 +-
 .../test_unit_layer_mean_squared_error.py     |   2 +-
 bamboo/unit_tests/test_unit_layer_one_hot.py  |   2 +-
 bamboo/unit_tests/test_unit_layer_pooling.py  |   2 +-
 .../test_unit_layer_pooling_distconv.py       |   2 +-
 .../unit_tests/test_unit_layer_reduction.py   |   2 +-
 bamboo/unit_tests/test_unit_layer_relu.py     |   2 +-
 .../test_unit_layer_relu_distconv.py          |   2 +-
 .../test_unit_layer_rowwise_weights_norms.py  |   2 +-
 bamboo/unit_tests/test_unit_layer_scatter.py  |  22 +-
 bamboo/unit_tests/test_unit_layer_selu.py     |   2 +-
 bamboo/unit_tests/test_unit_layer_sigmoid.py  |   2 +-
 ...unit_layer_sigmoid_binary_cross_entropy.py |   2 +-
 bamboo/unit_tests/test_unit_layer_slice.py    |   2 +-
 bamboo/unit_tests/test_unit_layer_softmax.py  |   2 +-
 bamboo/unit_tests/test_unit_layer_softplus.py |   2 +-
 bamboo/unit_tests/test_unit_layer_softsign.py |   2 +-
 .../test_unit_layer_squared_difference.py     |   2 +-
 .../unit_tests/test_unit_layer_tessellate.py  |   2 +-
 .../test_unit_layer_uniform_hash.py           |   2 +-
 bamboo/unit_tests/test_unit_layer_variance.py |   2 +-
 .../test_unit_load_weights_lenet.py           |   9 +-
 .../test_unit_reconstruction_loss.py          |   4 +-
 .../test_unit_subgraph_cross_grid_slice.py    |   2 +-
 .../test_unit_subgraph_cross_grid_sum.py      |   2 +-
 .../test_unit_subgraph_slice_concat.py        |   2 +-
 .../test_unit_subgraph_slice_sum.py           |   2 +-
 .../test_unit_subgraph_split_sum.py           |   2 +-
 .../buffered_data_coordinator.hpp             |  18 +-
 .../data_coordinator/data_coordinator.hpp     |  42 ++-
 .../data_coordinator_metadata.hpp             |   3 +-
 .../lbann/data_coordinator/io_data_buffer.hpp |  19 +-
 .../data_coordinator/io_data_buffer_impl.hpp  |  17 +
 include/lbann/data_readers/data_reader.hpp    | 149 +++++----
 .../lbann/data_readers/data_reader_HDF5.hpp   |   8 +-
 .../lbann/data_readers/data_reader_csv.hpp    |   6 +-
 .../data_readers/data_reader_jag_conduit.hpp  |  13 +-
 .../lbann/data_readers/data_reader_numpy.hpp  |   8 +-
 .../lbann/data_readers/data_reader_python.hpp |   4 +-
 .../data_reader_sample_list_impl.hpp          |   2 +-
 .../data_readers/data_reader_synthetic.hpp    |  10 +-
 .../lbann/data_readers/sample_list_impl.hpp   |   2 +-
 .../sample_list_open_files_impl.hpp           |   8 +-
 .../data_readers/utils/input_data_type.hpp    |  11 +-
 .../batch_functional_inference_algorithm.hpp  |   4 +-
 include/lbann/layers/io/input_layer.hpp       |  44 +--
 include/lbann/proto/proto_common.hpp          |   2 +-
 include/lbann/utils/argument_parser.hpp       |   5 +-
 .../lbann/utils/dnn_lib/cudnn/convolution.hpp |   2 +-
 .../utils/dnn_lib/miopen/convolution.hpp      |  12 +-
 include/lbann/utils/lbann_library.hpp         |   9 +-
 include/lbann/utils/protobuf_utils.hpp        |   4 +-
 include/lbann/utils/threads/thread_pool.hpp   |   5 +-
 .../data_readers/data_reader_jag.prototext    |   4 +-
 model_zoo/jag_utils/build_index.cpp           |  26 +-
 .../jag_utils/build_sample_id_mapping.cpp     |   2 +-
 .../jag_utils/check_for_duplicate_samples.cpp |  24 +-
 model_zoo/jag_utils/check_images.cpp          |  31 +-
 .../jag_utils/compute_hydra_normalization.cpp |  35 +-
 .../jag_utils/compute_min_max_images.cpp      |  31 +-
 .../compute_per_channel_image_avg_min_max.cpp |  31 +-
 model_zoo/jag_utils/convert.cpp               |   4 +-
 .../jag_utils/convert_npz_to_conduit.cpp      |  31 +-
 model_zoo/jag_utils/detect_corruption.cpp     |   2 +-
 .../jag_utils/extract_random_samples.cpp      |  34 +-
 .../jag_utils/generate_corrupt_samples.cpp    |  32 +-
 model_zoo/jag_utils/load_balance.cpp          |   7 +-
 model_zoo/jag_utils/load_bundle2raw.cpp       |   2 +-
 model_zoo/jag_utils/select_samples.cpp        |  14 +-
 model_zoo/jag_utils/test_conduit_hdf5.cpp     | 126 ++++----
 model_zoo/jag_utils/test_reading_speed.cpp    |  35 +-
 model_zoo/lbann.cpp                           |   5 +-
 model_zoo/lbann_aecycgan.cpp                  |  41 ++-
 model_zoo/lbann_cycgan.cpp                    |  73 +++--
 model_zoo/lbann_gan.cpp                       |  26 +-
 model_zoo/lbann_help.cpp                      |   2 +-
 model_zoo/lbann_inf.cpp                       |  10 +-
 model_zoo/tests/conduit_timing_test.cpp       |   2 +-
 .../tests/model_jag_single_layer_ae.prototext |   2 +-
 .../tests/model_lenet_mnist_ckpt.prototext    |  17 +-
 .../model_lenet_mnist_dist_ckpt.prototext     |  17 +-
 .../model_lenet_mnist_lbann2ckpt.prototext    |  17 +-
 src/callbacks/print_statistics.cpp            |   5 +-
 src/callbacks/timer.cpp                       |   5 +-
 .../buffered_data_coordinator.cpp             | 115 ++++---
 src/data_coordinator/data_coordinator.cpp     |   3 +-
 src/data_readers/CMakeLists.txt               |   3 -
 src/data_readers/data_reader.cpp              | 255 ++++++++-------
 src/data_readers/data_reader_HDF5.cpp         |  12 +-
 src/data_readers/data_reader_cifar10.cpp      |   4 +-
 src/data_readers/data_reader_csv.cpp          |   6 +-
 src/data_readers/data_reader_hdf5_legacy.cpp  |  24 +-
 src/data_readers/data_reader_image.cpp        |  26 +-
 src/data_readers/data_reader_imagenet.cpp     |   4 +-
 src/data_readers/data_reader_jag_conduit.cpp  |  39 +--
 .../data_reader_merge_samples.cpp             |   4 +-
 src/data_readers/data_reader_mesh.cpp         |   4 +-
 src/data_readers/data_reader_mnist.cpp        |   4 +-
 .../data_reader_npz_ras_lipid.cpp             |  18 +-
 src/data_readers/data_reader_numpy.cpp        |  16 +-
 src/data_readers/data_reader_numpy_npz.cpp    |  23 +-
 .../data_reader_numpy_npz_conduit.cpp         |  23 +-
 src/data_readers/data_reader_python.cpp       |  15 +-
 src/data_readers/data_reader_smiles.cpp       |   4 +-
 src/data_readers/data_reader_synthetic.cpp    |  31 +-
 src/data_readers/unit_test/CMakeLists.txt     |   2 +
 .../data_reader_smiles_fetch_datum_test.cpp   |  14 +-
 .../unit_test/data_reader_synthetic_test.cpp  | 194 +++++++++++
 .../data_reader_synthetic_test_public_api.cpp | 189 +++++++++++
 src/data_readers/utils/CMakeLists.txt         |   7 -
 src/data_readers/utils/input_data_type.cpp    |  43 ---
 src/data_store/data_store_conduit.cpp         |  12 +-
 .../ltfb/random_pairwise_exchange.cpp         |  17 +-
 .../ltfb/truncation_selection_exchange.cpp    |   4 +-
 .../unit_test/inference_algorithm_test.cpp    |   7 +-
 .../io/cereal_registration/input_layer.cpp    |   4 +-
 src/layers/io/input_layer.cpp                 |  47 ++-
 src/layers/learning/base_convolution.cpp      |   2 +-
 src/models/unit_test/lenet.prototext.inc      |  28 +-
 src/models/unit_test/modify_test.cpp          |   3 +
 src/proto/factories/layer_factory.cpp         |  17 +-
 src/proto/layers.proto                        |   4 +-
 src/proto/proto_common.cpp                    |  47 +--
 src/trainers/trainer.cpp                      |   7 +-
 src/utils/lbann_library.cpp                   |  23 +-
 src/utils/options.cpp                         | 306 +++++++-----------
 src/utils/protobuf_utils.cpp                  |  36 ++-
 src/utils/stack_profiler.cpp                  |   3 +-
 tests/test_shuffled_indices.cpp               |   6 +-
 .../test_sigint_tracing.cpp                   |   6 +-
 .../test_sigsev_tracing.cpp                   |   6 +-
 unit_test/MPICatchMain.cpp                    |   2 +-
 unit_test/SequentialCatchMain.cpp             |   4 +-
 214 files changed, 1933 insertions(+), 1500 deletions(-)
 create mode 100644 src/data_readers/unit_test/data_reader_synthetic_test.cpp
 create mode 100644 src/data_readers/unit_test/data_reader_synthetic_test_public_api.cpp
 delete mode 100644 src/data_readers/utils/CMakeLists.txt
 delete mode 100644 src/data_readers/utils/input_data_type.cpp

diff --git a/ReleaseNotes.txt b/ReleaseNotes.txt
index 1bca7fa3945..42445ec0224 100644
--- a/ReleaseNotes.txt
+++ b/ReleaseNotes.txt
@@ -39,9 +39,16 @@ I/O & data readers:
    SMILES data reader to minimize file access
  - Sample lists with integral indices can use range format (start ... end)
  - Added a new extensible HDF5 data reader that uses a data set schema
- and experiment schema files to define how the data is represented.
- This allows the user to change the representation of data without
- changing the data reader.
+   and experiment schema files to define how the data is represented.
+   This allows the user to change the representation of data without
+   changing the data reader.
+ - Changed the input layer to take a data field and only produce a
+   single output.  Currently valid Data fields are samples, labels,
+   and responses.
+ - Updated the data coordinator and data readers to
+   take dynamic data fields rather than fixed fields.  Input buffers
+   are no long allocated for fields that are not used in active
+   models.
 
 Build system:
 
diff --git a/applications/ATOM/eval_atom_wae.py b/applications/ATOM/eval_atom_wae.py
index fe17411a197..2de16855e85 100644
--- a/applications/ATOM/eval_atom_wae.py
+++ b/applications/ATOM/eval_atom_wae.py
@@ -106,7 +106,7 @@ def construct_model(run_args):
     print("sequence length is {}".format(sequence_length))
     data_layout = "data_parallel"
     # Layer graph
-    input_ = lbann.Identity(lbann.Input(name='inp',target_mode="N/A"), name='inp1')
+    input_ = lbann.Identity(lbann.Input(name='inp',data_field='samples'), name='inp1')
     wae_loss= []
     input_feature_dims = sequence_length
 
diff --git a/applications/ATOM/eval_atom_wae_dec.py b/applications/ATOM/eval_atom_wae_dec.py
index 0f8844b510a..aacf3234841 100644
--- a/applications/ATOM/eval_atom_wae_dec.py
+++ b/applications/ATOM/eval_atom_wae_dec.py
@@ -109,13 +109,13 @@ def construct_model(run_args):
     print("sequence length is {}, which is training sequence len + bos + eos".format(sequence_length))
     data_layout = "data_parallel"
     # Layer graph
-    input_ = lbann.Input(target_mode='N/A',name='inp_data')
+    input_ = lbann.Input(data_field='samples',name='inp_data')
     #Note input assumes to come from encoder script concatenation of input smiles + z
-    inp_slice = lbann.Slice(input_, axis=0, 
+    inp_slice = lbann.Slice(input_, axis=0,
                              slice_points=str_list([0, sequence_length, sequence_length+run_args.z_dim]),
                              name='inp_slice')
     inp_smile = lbann.Identity(inp_slice,name='inp_smile')
-    z  = lbann.Identity(inp_slice, name='z') 
+    z  = lbann.Identity(inp_slice, name='z')
     wae_loss= []
     input_feature_dims = sequence_length
 
@@ -202,7 +202,7 @@ def construct_data_reader(run_args):
     os.environ["DATA_PATH"] = run_args.data_path
     seq_len = run_args.sequence_length+run_args.z_dim
     print("SEQ LEN for env ", seq_len)
-    os.environ["MAX_SEQ_LEN"] = str(seq_len) 
+    os.environ["MAX_SEQ_LEN"] = str(seq_len)
     print("MODULE file ", module_file)
 
     module_name = os.path.splitext(os.path.basename(module_file))[0]
diff --git a/applications/ATOM/train_atom_char_rnn.py b/applications/ATOM/train_atom_char_rnn.py
index 6bb5f90b237..d923eaf15fc 100644
--- a/applications/ATOM/train_atom_char_rnn.py
+++ b/applications/ATOM/train_atom_char_rnn.py
@@ -111,7 +111,7 @@ def construct_model(run_args):
     data_layout = "data_parallel"
 
     # Layer graph
-    _input = lbann.Input(name="inp_tensor", target_mode="N/A")
+    _input = lbann.Input(name="inp_tensor", data_field='samples')
     print(sequence_length)
     x_slice = lbann.Slice(
         _input,
diff --git a/applications/ATOM/train_atom_vae.py b/applications/ATOM/train_atom_vae.py
index 3df874b7180..904be4187d8 100644
--- a/applications/ATOM/train_atom_vae.py
+++ b/applications/ATOM/train_atom_vae.py
@@ -104,7 +104,7 @@ def construct_model(run_args):
     print("sequence length is {}".format(sequence_length))
     data_layout = "data_parallel"
     # Layer graph
-    input_ = lbann.Identity(lbann.Input(name='inp',target_mode="N/A"), name='inp1')
+    input_ = lbann.Identity(lbann.Input(name='inp',data_field='samples'), name='inp1')
     vae_loss= []
     input_feature_dims = sequence_length
 
diff --git a/applications/ATOM/train_atom_wae.py b/applications/ATOM/train_atom_wae.py
index 4c7a841f826..3cadf7a1f59 100644
--- a/applications/ATOM/train_atom_wae.py
+++ b/applications/ATOM/train_atom_wae.py
@@ -107,7 +107,7 @@ def construct_model(run_args):
     print("sequence length is {}".format(sequence_length))
     data_layout = "data_parallel"
     # Layer graph
-    input_ = lbann.Identity(lbann.Input(name='inp',target_mode="N/A"), name='inp1')
+    input_ = lbann.Identity(lbann.Input(name='inp',data_field='samples'), name='inp1')
     input_feature_dims = sequence_length
 
     embedding_size = run_args.embedding_dim
diff --git a/applications/CANDLE/pilot1/train_combo.py b/applications/CANDLE/pilot1/train_combo.py
index 77357a126c2..95cc36630f3 100644
--- a/applications/CANDLE/pilot1/train_combo.py
+++ b/applications/CANDLE/pilot1/train_combo.py
@@ -24,15 +24,14 @@ def construct_model():
     import lbann
 
     # Layer graph
-    input_ = lbann.Input(target_mode='regression')
-    data = lbann.Identity(input_)
-    responses = lbann.Identity(input_)
+    data = lbann.Input(data_field='samples')
+    responses = lbann.Input(data_field='responses')
 
     pred = combo.Combo()(data)
     mse = lbann.MeanSquaredError([responses, pred])
 
     SS_res = lbann.Reduction(lbann.Square(lbann.Subtract(responses, pred)), mode='sum')
- 
+
     #SS_tot = var(x) = mean((x-mean(x))^2)
     mini_batch_size = lbann.MiniBatchSize()
     mean = lbann.Divide(lbann.BatchwiseReduceSum(responses), mini_batch_size)
@@ -48,7 +47,7 @@ def construct_model():
 
     # Construct model
     num_epochs = 100
-    layers = list(lbann.traverse_layer_graph(input_))
+    layers = list(lbann.traverse_layer_graph([data, responses]))
     return lbann.Model(num_epochs,
                        layers=layers,
                        metrics=metrics,
diff --git a/applications/graph/GNN/Dense_Graph_Trainer.py b/applications/graph/GNN/Dense_Graph_Trainer.py
index cb22e998e0c..d9ba3931f9a 100644
--- a/applications/graph/GNN/Dense_Graph_Trainer.py
+++ b/applications/graph/GNN/Dense_Graph_Trainer.py
@@ -8,28 +8,28 @@ def DGCN_layer(feature_matrix,adj_matrix, node_features):
     Args:
         feature_matrix (Layer): Node feature layer. Should have the shape:
                                 (num_nodes, node_features)
-        adj_matrix (Layer): Adjancency matrix layer. Should have the shape: 
+        adj_matrix (Layer): Adjancency matrix layer. Should have the shape:
                             (num_nodes, num_nodes)
         node_features (int): The number of features per node
-    Returns: 
+    Returns:
         (Layer): Returns the new embedding of the node features
     """
     out_channel_1 = 1024
     out_channel_2 = 512
     out_channel_3 = 256
-    
+
     gcn1 = DenseGCNConv(input_channels = node_features, output_channels = out_channel_1)
     gcn2 = DenseGCNConv(input_channels = out_channel_1, output_channels = out_channel_2)
     gcn3 = DenseGCNConv(input_channels = out_channel_2, output_channels = out_channel_3)
-    
+
     out_channel = out_channel_3
-    
+
     x = gcn1(feature_matrix, adj_matrix )
-    x = lbann.Relu(x,name="DGCN1_activation") 
+    x = lbann.Relu(x,name="DGCN1_activation")
 
     x = gcn2(x, adj_matrix)
     x = lbann.Relu(x, name="DGCN2_activation")
-    
+
     x = gcn3 (x, adj_matrix)
     x = lbann.Relu(x, name="DGCN3_activation")
     return x
@@ -40,35 +40,35 @@ def DGraph_Layer(feature_matrix,adj_matrix, node_features):
     Args:
         feature_matrix (Layer): Node feature layer. Should have the shape:
                                 (num_nodes, node_features)
-        adj_matrix (Layer): Adjancency matrix layer. Should have the shape: 
+        adj_matrix (Layer): Adjancency matrix layer. Should have the shape:
                             (num_nodes, num_nodes)
         node_features (int): The number of features per node
-    Returns: 
+    Returns:
         (Layer): Returns the new embedding of the node features
     """
     out_channel_1 = 1024
     out_channel_2 = 512
     out_channel_3 = 256
-    
+
     gcn1 = DenseGraphConv(input_channels = node_features, output_channels = out_channel_1)
     gcn2 = DenseGraphConv(input_channels = out_channel_1, output_channels = out_channel_2)
     gcn3 = DenseGraphConv(input_channels = out_channel_2, output_channels = out_channel_3)
-    
+
     out_channel = out_channel_3
-    
+
     x = gcn1(feature_matrix, adj_matrix )
-    x = lbann.Relu(x,name="DGraph1_activation") 
+    x = lbann.Relu(x,name="DGraph1_activation")
 
     x = gcn2(x, adj_matrix)
     x = lbann.Relu(x, name="DGraph2_activation")
-    
+
     x = gcn3 (x, adj_matrix)
     x = lbann.Relu(x, name="DGraph3_activation")
     return x
 
 
-def make_model(num_vertices = None, 
-               node_features = None, 
+def make_model(num_vertices = None,
+               node_features = None,
                num_classes = None,
                kernel_type = 'GCN',
                callbacks = None,
@@ -76,87 +76,87 @@ def make_model(num_vertices = None,
     '''Construct a model DAG using one of the Graph Kernels
 
     Args:
-        num_vertices (int): Number of vertices of each graph (default: None) 
+        num_vertices (int): Number of vertices of each graph (default: None)
         node_features (int): Number of features per noded (default: None)
         num_classes (int): Number of classes as targets (default: None)
-        kernel_type (str): Graph Kernel to use in model. Expected one of 
+        kernel_type (str): Graph Kernel to use in model. Expected one of
                             GCN, or Graph (deafult: GCN)
-        callbacks (list): Callbacks for the model. If set to None the model description, 
-                          GPU usage, training_output, and timer is reported. 
-                          (default: None)                    
+        callbacks (list): Callbacks for the model. If set to None the model description,
+                          GPU usage, training_output, and timer is reported.
+                          (default: None)
         num_epochs (int): Number of epochs to run (default: 1)
     Returns:
         (lbann Model Object: A model object with the supplied callbacks, dataset
-                               presets, and graph kernels. 
-    '''   
-    
+                               presets, and graph kernels.
+    '''
+
     num_vertices = 100
     num_classes = 2
     node_features = 3
 
     assert num_vertices is not None
-    assert num_classes is not None 
-    assert node_features is not None 
-    
+    assert num_classes is not None
+    assert node_features is not None
+
 
     #----------------------------------
-    # Reshape and Slice Input Tensor 
+    # Reshape and Slice Input Tensor
     #----------------------------------
 
-    input_ = lbann.Input(target_mode='N/A')
+    input_ = lbann.Input(data_field='samples')
 
     # Input dimensions should be (num_vertices * node_features + num_vertices^2 + num_classes )
-    # input should have atleast two children since the target is classification 
-    
+    # input should have atleast two children since the target is classification
+
     sample_dims = num_vertices*node_features + (num_vertices ** 2) + num_classes
     graph_dims = num_vertices*node_features + (num_vertices ** 2)
-    feature_matrix_size = num_vertices * node_features 
-   
-    graph_input = lbann.Slice(input_, axis = 0 , 
+    feature_matrix_size = num_vertices * node_features
+
+    graph_input = lbann.Slice(input_, axis = 0 ,
                               slice_points = str_list([0,feature_matrix_size,graph_dims, sample_dims]),
-                              name = "Graph_Input") 
+                              name = "Graph_Input")
 
-    
-    feature_matrix = lbann.Reshape(graph_input, 
-                                   dims = str_list([num_vertices, node_features]), 
+
+    feature_matrix = lbann.Reshape(graph_input,
+                                   dims = str_list([num_vertices, node_features]),
                                    name="Node_features")
-    
+
     adj_matrix = lbann.Reshape(graph_input,
-                               dims = str_list([num_vertices,num_vertices]), 
-                               name="Adj_Mat") 
+                               dims = str_list([num_vertices,num_vertices]),
+                               name="Adj_Mat")
 
     target = lbann.Identity(graph_input, name="Target")
-    target = lbann.Reshape(target, dims=str(num_classes))    
-   
+    target = lbann.Reshape(target, dims=str(num_classes))
+
     #----------------------------------
     # Perform Graph Convolution
     #----------------------------------
-    
+
     if kernel_type == 'GCN':
         x = DGCN_layer(feature_matrix, adj_matrix, node_features)
     elif kernel_type == 'Graph':
         x = DGraph_Layer(feature_matrix, adj_matrix, node_features)
     else:
         ValueError('Invalid Graph kernel specifier "{}" recieved. Expected one of:\
-                    GCN or Graph'.format(kernel_type)) 
-    out_channel = 256    
+                    GCN or Graph'.format(kernel_type))
+    out_channel = 256
     #----------------------------------
     # Apply Reduction on Node Features
     #----------------------------------
 
     average_vector = lbann.Constant(value = 1/num_vertices, num_neurons = str_list([1,num_vertices]), name="Average_Vector")
-    x = lbann.MatMul(average_vector,x, name="Node_Feature_Reduction") # X is now a vector with output_channel dimensions 
-    
+    x = lbann.MatMul(average_vector,x, name="Node_Feature_Reduction") # X is now a vector with output_channel dimensions
+
     x = lbann.Reshape(x, dims= str_list([out_channel]), name="Squeeze")
     x = lbann.FullyConnected(x, num_neurons=256, name="hidden_layer_1")
     x = lbann.Relu(x, name="hidden_layer_1_activation")
     x = lbann.FullyConnected(x, num_neurons=num_classes, name="Output_Fully_Connected")
-    
+
     #----------------------------------
     # Loss Function and Accuracy s
     #----------------------------------
-    
-    
+
+
     probs = lbann.Softmax(x, name="Softmax")
     loss = lbann.CrossEntropy(probs, target, name="Cross_Entropy_Loss")
     accuracy = lbann.CategoricalAccuracy(probs, target, name="Accuracy")
@@ -171,13 +171,13 @@ def make_model(num_vertices = None,
         callbacks = [print_model, training_output, gpu_usage, timer]
     else:
         if isinstance (callbacks, list):
-            callbacks = callbacks   
+            callbacks = callbacks
     metrics = [lbann.Metric(accuracy, name='accuracy', unit="%")]
 
-    model = lbann.Model(num_epochs, 
+    model = lbann.Model(num_epochs,
                        layers = layers,
                        objective_function = loss,
-                       metrics = metrics, 
+                       metrics = metrics,
                        callbacks = callbacks
                        )
     return model
@@ -185,4 +185,4 @@ def make_model(num_vertices = None,
 
 if __name__ == '__main__':
     model = make_model(dataset="MNIST")
-    model = make_model(dataset="MNIST", kernel_type = 'Graph') 
+    model = make_model(dataset="MNIST", kernel_type = 'Graph')
diff --git a/applications/graph/GNN/NNConvModel.py b/applications/graph/GNN/NNConvModel.py
index 4efe5c7aaf8..8549d578217 100755
--- a/applications/graph/GNN/NNConvModel.py
+++ b/applications/graph/GNN/NNConvModel.py
@@ -98,18 +98,18 @@ def graph_data_splitter(_input,
 	 					NUM_EDGE_FEATURES,
 	 					EMBEDDING_DIM,
 	 					EDGE_EMBEDDING_DIM):
-		"""Helper function to split the input data into  
+		"""Helper function to split the input data into
 
-			Args: 
+			Args:
 				NUM_NODES (int): The number of nodes in the largest graph in the dataset (51 for LSC-PPQM4M)
 		      	NUM_EDGES (int): The number of edges in the largest graph in the dataset (118 for LSC-PPQM4M)
 		      	NUM_NODE_FEATURES (int): The dimensionality of the input node features vector (9 for LSC-PPQM4M)
 		      	NUM_EDGE_FEATURES (int): The dimensionality of the input edge feature vectors (3 for LSC-PPQM4M)
 		      	EMBEDDING_DIM (int): The embedding dimensionality of the node feature vector
 
-		      	EDGE_EMBEDDING_DIM (int): The embedding dimensionality of the edge feature vector 
+		      	EDGE_EMBEDDING_DIM (int): The embedding dimensionality of the edge feature vector
 			Returns:
-				(Layer, Layer, Layer, Layer, Layer): Returns 5 Layers. The embedded node feature matrix, the 
+				(Layer, Layer, Layer, Layer, Layer): Returns 5 Layers. The embedded node feature matrix, the
 													 neighbord nodes feature tensor, the embedded edge feature matrix,
 													 the source node index vector, and the label
 		"""
@@ -175,7 +175,7 @@ def graph_data_splitter(_input,
 
 def create_parallel_strategy(num_channel_groups):
     """Helper function to create channelwise fully connected layer distconv
-       parallel strategy 
+       parallel strategy
     """
     if (num_channel_groups > 0):
       return {"channel_groups": num_channel_groups,
@@ -199,12 +199,12 @@ def NNConvLayer(node_features,
 	Args: node_features (Layer): Layer containing the node featue matrix of the graph (NUM_NODES, in_channel)
 	      neighbor_features (Layer): Layer containing the neighbor feature tensor of the graph of shape (NUM_EDGES, 1, in_channel)
 	      edge_features (Layer): Layer containing the edge feature matrix of the graph of shape (NUM_EDGES, EMBEDDED_EDGE_FEATURES)
-	      edge_index (Layer): Layer contain the source edge index vector of the graph of shape (NUM_EDGES) 
+	      edge_index (Layer): Layer contain the source edge index vector of the graph of shape (NUM_EDGES)
 	      in_channel (int): The embedding dimensionality of the node feature vector
 	      out_channel (int): The dimensionality of the node feature vectors after graph convolutions
 	      NUM_NODES (int): The number of nodes in the largest graph in the dataset (51 for LSC-PPQM4M)
 	      NUM_EDGES (int): The number of edges in the largest graph in the dataset (118 for LSC-PPQM4M)
-	      NUM_GROUPS (int): The number of channel groups for distconv channelwise  fully connected layer (default : 0) 
+	      NUM_GROUPS (int): The number of channel groups for distconv channelwise  fully connected layer (default : 0)
 	      """
 	FC = ChannelwiseFullyConnectedModule
 
@@ -225,19 +225,19 @@ def NNConvLayer(node_features,
 		FC1 = [1, FC1]
 		FC2 = [1, FC2]
 		FC3 = [1, FC3]
-	
+
 	sequential_nn = \
-	[FC(FC1, weights=[nn_sq_1_weight], 
-			name="NN_SQ_1", bias=True, 
-			activation=lbann.Relu, 
+	[FC(FC1, weights=[nn_sq_1_weight],
+			name="NN_SQ_1", bias=True,
+			activation=lbann.Relu,
 			parallel_strategy=create_parallel_strategy(NUM_GROUPS)),
-	 FC(FC2, weights=[nn_sq_2_weight], 
-	 		name="NN_SQ_2", bias=True, 
-	 		activation=lbann.Relu, 
+	 FC(FC2, weights=[nn_sq_2_weight],
+	 		name="NN_SQ_2", bias=True,
+	 		activation=lbann.Relu,
 	 		parallel_strategy=create_parallel_strategy(NUM_GROUPS)),
-	 FC(FC3, weights=[nn_sq_3_weight], 
-	 		name="NN_SQ_3", bias=True, 
-	 		activation=lbann.Relu, 
+	 FC(FC3, weights=[nn_sq_3_weight],
+	 		name="NN_SQ_3", bias=True,
+	 		activation=lbann.Relu,
 	 		parallel_strategy=create_parallel_strategy(NUM_GROUPS))]
 
 	nn_conv = NNConv(sequential_nn,
@@ -264,16 +264,16 @@ def make_model(NUM_NODES,
 						   NUM_GROUPS=0):
 	""" Creates an LBANN model for the OGB-LSC PPQM4M Dataset
 
-	Args: 
+	Args:
 		NUM_NODES (int): The number of nodes in the largest graph in the dataset (51 for LSC-PPQM4M)
   	NUM_EDGES (int): The number of edges in the largest graph in the dataset (118 for LSC-PPQM4M)
   	NUM_NODES_FEATURES (int): The dimensionality of the input node features vector (9 for LSC-PPQM4M)
   	NUM_EDGE_FEATURES (int): The dimensionality of the input edge feature vectors (3 for LSC-PPQM4M)
   	EMBEDDING_DIM (int): The embedding dimensionality of the node feature vector
   	EDGE_EMBEDDING_DIM (int): The embedding dimensionality of the edge feature vector
-  	NUM_OUT_FEATURES (int): The dimensionality of the node feature vectors after graph convolutions 
+  	NUM_OUT_FEATURES (int): The dimensionality of the node feature vectors after graph convolutions
   	NUM_EPOCHS (int): The number of epochs to train the network
-  	NUM_GROUPS (int): The number of channel groups for distconv channelwise  fully connected layer (default : 0)  
+  	NUM_GROUPS (int): The number of channel groups for distconv channelwise  fully connected layer (default : 0)
 	Returns:
 		(Model): lbann model object
 		"""
@@ -281,7 +281,7 @@ def make_model(NUM_NODES,
 	out_channel = NUM_OUT_FEATURES
 	output_dimension = 1
 
-	_input = lbann.Input(target_mode='N/A')
+	_input = lbann.Input(data_field='samples')
 	node_feature_mat, neighbor_feature_mat, edge_feature_mat, edge_indices, target = \
 		graph_data_splitter(_input,
 												NUM_NODES,
diff --git a/applications/graph/GNN/Sparse_Graph_Trainer.py b/applications/graph/GNN/Sparse_Graph_Trainer.py
index 3130cd33720..0ecb028c940 100644
--- a/applications/graph/GNN/Sparse_Graph_Trainer.py
+++ b/applications/graph/GNN/Sparse_Graph_Trainer.py
@@ -1,35 +1,35 @@
-import lbann 
-from lbann.util import str_list 
+import lbann
+from lbann.util import str_list
 from lbann.modules.graph import GINConv, GCNConv, GraphConv, GatedGraphConv
 from itertools import accumulate
 
 
 def Graph_Data_Parser(_lbann_input_,
-                      num_nodes, 
-                      node_feature_size, 
+                      num_nodes,
+                      node_feature_size,
                       max_edges,
                       num_classes = 1):
     """ A parser for graph structured data with node
         features, source and target node indices (COO)
-        format, and a target 
+        format, and a target
 
     Args:
-        _lbann_input_ (Layer): The input layer of the LBANN model 
+        _lbann_input_ (Layer): The input layer of the LBANN model
         num_nodes (int): The maximum number of nodes in the dataset
         node_features_size (int): The dimensionality of the node features matrix
         max_edges (int): The maximum number of edges in the dataset
-        num_classes (int): The number of classes in the target or 1 for 
+        num_classes (int): The number of classes in the target or 1 for
                            regression (default : 1)
     Returns:
-        (dictionary) Returns a dictionary with the keys: node_features, source_indices, 
-                     target_indices, and targets  
+        (dictionary) Returns a dictionary with the keys: node_features, source_indices,
+                     target_indices, and targets
     """
     slice_points = [0, num_nodes*node_feature_size, max_edges, max_edges, num_classes]
     shifted_slice_points = list(accumulate(slice_points))
     sliced_input = lbann.Slice(_lbann_input_,
                                slice_points=str_list(shifted_slice_points),
                                name="Sliced_Graph_Input")
-    node_features = lbann.Reshape(lbann.Identity(sliced_input), 
+    node_features = lbann.Reshape(lbann.Identity(sliced_input),
                                   dims=str_list([num_nodes, node_feature_size]),
                                   name="Node_Feature_Matrix")
     source_indices = lbann.Identity(sliced_input)
@@ -51,21 +51,21 @@ def GINConvLayer(node_features,
                  num_edges,
                  input_channels,
                  output_channels):
-    """An example GIN kernel with 4 layer deep sequential nn.  
+    """An example GIN kernel with 4 layer deep sequential nn.
     Args:
-        node_feature (Layer): Node feature matrix with the shape of (num_nodes,input_channels) 
+        node_feature (Layer): Node feature matrix with the shape of (num_nodes,input_channels)
         source_indices (Layer): Source node indices of the edges with shape (num_nodes)
         target_indices (Layer): Target node indices of the edges with shape (num_nodes)
         num_nodes (int): Number of vertices in the graph
         input_channels (int): The size of the input node features
-        output_channels (int): The number of output channels of the node features 
-    Returns: 
-        (GraphVertexData): Returns the new embedding of the node features 
+        output_channels (int): The number of output channels of the node features
+    Returns:
+        (GraphVertexData): Returns the new embedding of the node features
     """
     FC = lbann.modules.ChannelwiseFullyConnectedModule
     sequential_nn = \
-                    [FC(128), 
-                     lbann.Relu, 
+                    [FC(128),
+                     lbann.Relu,
                      FC(64),
                      lbann.Relu,
                      FC(32),
@@ -74,7 +74,7 @@ def GINConvLayer(node_features,
                      lbann.Relu]
 
     gin = GINConv(sequential_nn,
-                  input_channels = input_channels, 
+                  input_channels = input_channels,
                   output_channels = output_channels,
                   num_nodes = num_nodes,
                   num_edges = num_edges)
@@ -90,13 +90,13 @@ def GCNConvLayer(node_features,
                  output_channels):
     """An example 2-layer GCN kernel.
     Args:
-        node_feature (Layer): Node feature matrix with the shape of (num_nodes,input_channels) 
+        node_feature (Layer): Node feature matrix with the shape of (num_nodes,input_channels)
         source_indices (Layer): Source node indices of the edges with shape (num_nodes)
         target_indices (Layer): Target node indices of the edges with shape (num_nodes)
         num_nodes (int): Number of vertices in the graph
         input_channels (int): The size of the input node features
-        output_channels (int): The number of output channels of the node features 
-    Returns: 
+        output_channels (int): The number of output channels of the node features
+    Returns:
         (Layer) : The resultant node features after message passing kernel ops
     """
     input_channels_1 = input_channels
@@ -111,13 +111,13 @@ def GCNConvLayer(node_features,
                     name = 'GCN_1')
     gcn_2 = GCNConv(input_channels_2,out_channels_2,
                     num_nodes,
-                    bias = True, 
+                    bias = True,
                     activation = lbann.Relu,
                     name = 'GCN_2')
     X = gcn_1(node_features,source_indices, target_indices)
     return  gcn_2(X,source_indices, target_indices)
 
-   
+
 def GraphConvLayer(node_features,
                    source_indices,
                    target_indices,
@@ -127,29 +127,29 @@ def GraphConvLayer(node_features,
                    output_channels):
     """An example 2-layer Graph kernel.
     Args:
-        node_feature (Layer): Node feature matrix with the shape of (num_nodes,input_channels) 
+        node_feature (Layer): Node feature matrix with the shape of (num_nodes,input_channels)
         source_indices (Layer): Source node indices of the edges with shape (num_nodes)
         target_indices (Layer): Target node indices of the edges with shape (num_nodes)
         num_nodes (int): Number of vertices in the graph
         input_channels (int): The size of the input node features
         output_channels (int): The number of output channels of the node features
-    Returns: 
+    Returns:
         (Layer) : The resultant node features after message passing kernel ops
     """
     input_channels_1 = input_channels
-    out_channels_1 = 8 
+    out_channels_1 = 8
     input_channels_2 = out_channels_1
     out_channels_2 = output_channels
-    
+
     graph_1 = GraphConv(input_channels_1, out_channels_1,
                         num_nodes,
                         bias = True,
                         activation = lbann.Relu,
                         name = 'Graph_kernel_1')
-    graph_2 = GraphConv(input_channels_2, out_channels_2, 
+    graph_2 = GraphConv(input_channels_2, out_channels_2,
                         num_nodes,
                         bias = True,
-                        activation = lbann.Relu, 
+                        activation = lbann.Relu,
                         name = 'Graph_Kernel_2')
 
     X = graph_1(node_features,source_indices, target_indices)
@@ -164,27 +164,27 @@ def GATConvLayer(node_features,
                  output_channels):
     """An example single layer GatedGraph kernel.
     Args:
-        node_feature (Layer): Node feature matrix with the shape of (num_nodes,input_channels) 
+        node_feature (Layer): Node feature matrix with the shape of (num_nodes,input_channels)
         source_indices (Layer): Source node indices of the edges with shape (num_nodes)
         target_indices (Layer): Target node indices of the edges with shape (num_nodes)
         num_nodes (int): Number of vertices in the graph
         input_channels (int): The size of the input node features
         output_channels (int): The number of output channels of the node features
-    Returns: 
+    Returns:
         (Layer) : The resultant node features after message passing kernel ops
-    """    
+    """
     num_layers = 3
     name = 'GatedGraph'
-    data_layout = 'data_parallel' 
+    data_layout = 'data_parallel'
 
     graph_kernel = GatedGraphConv(input_channels, output_channels,
-                                  num_nodes, 
+                                  num_nodes,
                                   num_layers = num_layers,
                                   name = name)
     return graph_kernel(node_features,source_indices, target_indices)
 
-def make_model(num_vertices = None, 
-               node_features = None, 
+def make_model(num_vertices = None,
+               node_features = None,
                num_classes = None,
                kernel_type = 'GCN',
                callbacks = None,
@@ -192,41 +192,41 @@ def make_model(num_vertices = None,
     '''Construct a model DAG using one of the Graph Kernels
 
     Args:
-        num_vertices (int): Number of vertices of each graph (default: None) 
+        num_vertices (int): Number of vertices of each graph (default: None)
         node_features (int): Number of features per noded (default: None)
         num_classes (int): Number of classes as targets (default: None)
-        
-        kernel_type (str): Graph Kernel to use in model. Expected one of 
+
+        kernel_type (str): Graph Kernel to use in model. Expected one of
                             GCN, GIN, Graph, or GatedGraph (deafult: GCN)
-        callbacks (list): Callbacks for the model. If set to None the model description, 
-                          GPU usage, training_output, and timer is reported. 
-                          (default: None)                    
+        callbacks (list): Callbacks for the model. If set to None the model description,
+                          GPU usage, training_output, and timer is reported.
+                          (default: None)
         num_epochs (int): Number of epochs to run (default: 1)
     Returns:
         (lbann.Model) : A model object with the supplied callbacks, dataset
-                               presets, and graph kernels. 
+                               presets, and graph kernels.
     '''
 
     num_vertices = 100
     num_classes = 2
     node_feature_size = 3
-    max_edges = 415  
+    max_edges = 415
 
     #----------------------------------
-    # Reshape and Slice Input Tensor 
+    # Reshape and Slice Input Tensor
     #----------------------------------
 
-    input_ = lbann.Input(target_mode="N/A")
+    input_ = lbann.Input(data_field='samples')
+
+    # Input dimensions should be (num_vertices * node_features + num_vertices^2 + num_classes )
 
-    # Input dimensions should be (num_vertices * node_features + num_vertices^2 + num_classes )     
-    
     data = Graph_Data_Parser(input_,
                              num_vertices,
                              node_feature_size,
                              max_edges,
                              num_classes)
-    
-    feature_matrix = data['node_features'] 
+
+    feature_matrix = data['node_features']
     source_indices = data['source_indices']
     target_indices = data['target_indices']
     target = data['target']
@@ -235,25 +235,25 @@ def make_model(num_vertices = None,
     # Select Graph Convolution
     #----------------------------------
 
-   
+
 
     output_channels = 16
     graph_kernel_op = None
     if kernel_type == 'GIN':
-        graph_kernel_op = GINConvLayer 
+        graph_kernel_op = GINConvLayer
     elif kernel_type == 'GCN':
         graph_kernel_op = GCNConvLayer
     elif kernel_type == 'Graph':
-        graph_kernel_op = GraphConvLayer 
+        graph_kernel_op = GraphConvLayer
     elif kernel_type == 'GatedGraph':
-        graph_kernel_op = GATConvLayer 
+        graph_kernel_op = GATConvLayer
     else:
         raise ValueError('Invalid Graph kernel specifier "{}" recieved. Expected one of:\
                     GIN,GCN,Graph or GatedGraph'.format(kernel_type))
     #----------------------------------
     # Perform Graph Convolution
     #----------------------------------
-    
+
     x = graph_kernel_op(feature_matrix,
                         source_indices,
                         target_indices,
@@ -265,31 +265,31 @@ def make_model(num_vertices = None,
     # Apply Reduction on Node Features
     #----------------------------------
 
-    average_vector = lbann.Constant(value = 1/num_vertices, 
+    average_vector = lbann.Constant(value = 1/num_vertices,
                                     num_neurons = str_list([1,num_vertices]),
                                     name="Average_Vector")
-    
-    x = lbann.MatMul(average_vector,x, name="Node_Feature_Reduction") 
-    
-    # X is now a vector with output_channel dimensions 
-    
+
+    x = lbann.MatMul(average_vector,x, name="Node_Feature_Reduction")
+
+    # X is now a vector with output_channel dimensions
+
     x = lbann.Reshape(x, dims = str_list([output_channels]), name = "Squeeze")
     x = lbann.FullyConnected(x, num_neurons = 64, name = "hidden_layer_1")
     x = lbann.Relu(x, name = "hidden_layer_1_activation")
     x = lbann.FullyConnected(x, num_neurons = num_classes,
                                 name="Output_Fully_Connected")
-    
+
     #----------------------------------
     # Loss Function and Accuracy s
     #----------------------------------
-    
-    
+
+
     probs = lbann.Softmax(x, name="Softmax")
     loss = lbann.CrossEntropy(probs, target, name="Cross_Entropy_Loss")
     accuracy = lbann.CategoricalAccuracy(probs, target, name="Accuracy")
 
     layers = lbann.traverse_layer_graph(input_)
-    
+
     if callbacks is None:
         print_model = lbann.CallbackPrintModelDescription() #Prints initial Model after Setup
         training_output = lbann.CallbackPrint( interval = 1,
@@ -303,10 +303,10 @@ def make_model(num_vertices = None,
 
     metrics = [lbann.Metric(accuracy, name='accuracy', unit="%")]
 
-    model = lbann.Model(num_epochs, 
+    model = lbann.Model(num_epochs,
                        layers = layers,
                        objective_function = loss,
-                       metrics = metrics, 
+                       metrics = metrics,
                        callbacks = callbacks
                        )
     return model
diff --git a/applications/graph/communityGAN/model/__init__.py b/applications/graph/communityGAN/model/__init__.py
index b9964861386..1cddcd0fe29 100644
--- a/applications/graph/communityGAN/model/__init__.py
+++ b/applications/graph/communityGAN/model/__init__.py
@@ -15,7 +15,7 @@ def make_model(
 
     # Layer graph
     input_ = lbann.Slice(
-        lbann.Input(),
+        lbann.Input(data_field='samples'),
         slice_points=str_list([0, motif_size, motif_size+walk_length]),
     )
     motif_indices = lbann.Identity(input_)
diff --git a/applications/graph/motif/model/__init__.py b/applications/graph/motif/model/__init__.py
index 0f0f101eee9..98cb7cf9383 100644
--- a/applications/graph/motif/model/__init__.py
+++ b/applications/graph/motif/model/__init__.py
@@ -8,7 +8,7 @@ def make_model(
 ):
 
     # Layer graph
-    data = lbann.Identity(lbann.Input())
+    data = lbann.Input(data_field='samples')
     autoencoder = model.autoencoder.FullyConnectedAutoencoder(
         data_dim,
         latent_dim,
diff --git a/applications/graph/node2vec/main.py b/applications/graph/node2vec/main.py
index 6a3b18cba84..c6a86c1bf6a 100644
--- a/applications/graph/node2vec/main.py
+++ b/applications/graph/node2vec/main.py
@@ -165,7 +165,7 @@
 
 # Embedding vectors, including negative sampling
 # Note: Input is sequence of vertex IDs
-input_ = lbann.Identity(lbann.Input())
+input_ = lbann.Input(data_field='samples')
 if args.embeddings == 'distributed':
     embeddings_weights = lbann.Weights(
         initializer=lbann.NormalInitializer(
diff --git a/applications/graph/node2vec/randproj.py b/applications/graph/node2vec/randproj.py
index f70bccb5159..2beca06fb42 100644
--- a/applications/graph/node2vec/randproj.py
+++ b/applications/graph/node2vec/randproj.py
@@ -115,7 +115,7 @@
 
 # Autoencoder
 # Note: Input is sequence of vertex IDs
-input_ = lbann.Identity(lbann.Input())
+input_ = lbann.Input(data_field='samples')
 proj = model.random_projection.random_projection(
     input_,
     sample_size,
diff --git a/applications/nlp/rnn/main.py b/applications/nlp/rnn/main.py
index 3aa00463919..426a978132d 100644
--- a/applications/nlp/rnn/main.py
+++ b/applications/nlp/rnn/main.py
@@ -45,7 +45,7 @@
 sequence_length = dataset.sample_dims()[0]
 
 # Input is a sequence of token IDs
-input_ = lbann.Identity(lbann.Input())
+input_ = lbann.Input(data_field='samples')
 input_slice = lbann.Slice(input_,
                           slice_points=str_list(range(sequence_length+1)))
 tokens_list = [lbann.Identity(input_slice) for _ in range(sequence_length)]
diff --git a/applications/nlp/transformer/subgraph/train.py b/applications/nlp/transformer/subgraph/train.py
index a824e1a3a3d..008dcd74a88 100644
--- a/applications/nlp/transformer/subgraph/train.py
+++ b/applications/nlp/transformer/subgraph/train.py
@@ -47,7 +47,7 @@ def make_model(
     )
 
     # Input is two sequences of token IDs
-    input_ = lbann.Identity(lbann.Input())
+    input_ = lbann.Input(data_field='samples')
 
     # Get sequences of embedding vectors
     # Note: Scale embeddings by sqrt(embed_dim).
diff --git a/applications/nlp/transformer/subgraph/train_infer.py b/applications/nlp/transformer/subgraph/train_infer.py
index cd59eebb7d0..f931cba6227 100644
--- a/applications/nlp/transformer/subgraph/train_infer.py
+++ b/applications/nlp/transformer/subgraph/train_infer.py
@@ -47,7 +47,7 @@ def make_model(
     )
 
     # Input is two sequences of token IDs
-    input_ = lbann.Identity(lbann.Input())
+    input_ = lbann.Input(data_field='samples')
 
     # Get sequences of embedding vectors
     # Note: Scale embeddings by sqrt(embed_dim).
@@ -250,5 +250,5 @@ def make_batch_script(trainer_params, model_params, script_params):
                        batch_job=False,)
                                    # **kwargs)
 
-   
+
     print(status)
diff --git a/applications/nlp/transformer/train.py b/applications/nlp/transformer/train.py
index 45c2eb3e237..f5a8dacf17e 100644
--- a/applications/nlp/transformer/train.py
+++ b/applications/nlp/transformer/train.py
@@ -37,7 +37,7 @@ def make_model(
     )
 
     # Input is two sequences of token IDs
-    input_ = lbann.Identity(lbann.Input())
+    input_ = lbann.Input(data_field='samples')
 
     # Get sequences of embedding vectors
     # Note: Scale embeddings by sqrt(embed_dim).
diff --git a/applications/optimizers/kfac/kfac.py b/applications/optimizers/kfac/kfac.py
index c1cc821323c..f4a751a9df3 100755
--- a/applications/optimizers/kfac/kfac.py
+++ b/applications/optimizers/kfac/kfac.py
@@ -92,9 +92,8 @@ def str_list(l):
 num_classes = 10
 
 # Input data
-input_ = lbann.Input(target_mode='classification')
-images = lbann.Identity(input_)
-labels = lbann.Identity(input_)
+images = lbann.Input(data_field='samples')
+labels = lbann.Input(data_field='labels')
 has_bias = False
 x = images
 
diff --git a/applications/physics/ICF/eval_macc_surrogate.py b/applications/physics/ICF/eval_macc_surrogate.py
index 0fadf7602a5..570915087f5 100644
--- a/applications/physics/ICF/eval_macc_surrogate.py
+++ b/applications/physics/ICF/eval_macc_surrogate.py
@@ -102,7 +102,7 @@ def construct_model():
     import lbann
 
     # Layer graph
-    input = lbann.Input(target_mode='N/A',name='inp_data')
+    input = lbann.Input(data_field='samples',name='inp_data')
     # data is 64*64*4 images + 15 scalar + 5 param
     inp_slice = lbann.Slice(input, axis=0, slice_points=str_list([0,args.ydim,args.ydim+args.xdim]),name='inp_slice')
     gt_y = lbann.Identity(inp_slice,name='gt_y')
diff --git a/applications/physics/ICF/pre_train_jag_wae.py b/applications/physics/ICF/pre_train_jag_wae.py
index a58598fcf50..ceb0bb8b630 100644
--- a/applications/physics/ICF/pre_train_jag_wae.py
+++ b/applications/physics/ICF/pre_train_jag_wae.py
@@ -88,7 +88,7 @@ def construct_model():
     import lbann
 
     # Layer graph
-    input = lbann.Input(target_mode='N/A', name='inp_data')
+    input = lbann.Input(data_field='samples', name='inp_data')
     # data is 64*64*4 images + 15 scalar + 5 param
     #inp_slice = lbann.Slice(input, axis=0, slice_points="0 16399 16404",name='inp_slice')
     inp_slice = lbann.Slice(input, axis=0, slice_points=str_list([0,args.ydim,args.ydim+5]),name='inp_slice')
diff --git a/applications/physics/ICF/train_jag_wae.py b/applications/physics/ICF/train_jag_wae.py
index b6f41e8c493..12266b58653 100644
--- a/applications/physics/ICF/train_jag_wae.py
+++ b/applications/physics/ICF/train_jag_wae.py
@@ -25,7 +25,7 @@ def construct_model():
     import lbann
 
     # Layer graph
-    input = lbann.Input(target_mode='N/A',name='inp_data')
+    input = lbann.Input(data_field='samples',name='inp_data')
     # data is 64*64*4 images + 15 scalar + 5 param
     inp_slice = lbann.Slice(input, axis=0, slice_points="0 16399 16404",name='inp_slice')
     gt_y = lbann.Identity(inp_slice,name='gt_y')
diff --git a/applications/physics/ICF/train_macc_surrogate.py b/applications/physics/ICF/train_macc_surrogate.py
index 0f6e9c27aee..f633d1fac9a 100644
--- a/applications/physics/ICF/train_macc_surrogate.py
+++ b/applications/physics/ICF/train_macc_surrogate.py
@@ -108,7 +108,7 @@ def construct_model():
     import lbann
 
     # Layer graph
-    input = lbann.Input(target_mode='N/A',name='inp_data')
+    input = lbann.Input(data_field='samples',name='inp_data')
     # data is 64*64*4 images + 15 scalar + 5 param
     inp_slice = lbann.Slice(input, axis=0, slice_points=str_list([0,args.ydim,args.ydim+args.xdim]),name='inp_slice')
     gt_y = lbann.Identity(inp_slice,name='gt_y')
diff --git a/applications/physics/cosmology/ExaGAN/train_exagan.py b/applications/physics/cosmology/ExaGAN/train_exagan.py
index d27999d82b4..ea11f600704 100644
--- a/applications/physics/cosmology/ExaGAN/train_exagan.py
+++ b/applications/physics/cosmology/ExaGAN/train_exagan.py
@@ -18,7 +18,7 @@ def construct_model():
     import lbann
 
     # Layer graph
-    input = lbann.Input(target_mode='N/A',name='inp_img')
+    input = lbann.Input(data_field='samples',name='inp_img')
     #label flipping
     label_flip_rand = lbann.Uniform(min=0,max=1, neuron_dims='1')
     label_flip_prob = lbann.Constant(value=0.01, num_neurons='1')
diff --git a/applications/physics/cosmology/cosmoflow/cosmoflow.py b/applications/physics/cosmology/cosmoflow/cosmoflow.py
index 9bab6c2137d..5379df966e2 100755
--- a/applications/physics/cosmology/cosmoflow/cosmoflow.py
+++ b/applications/physics/cosmology/cosmoflow/cosmoflow.py
@@ -335,10 +335,8 @@ def create_cosmoflow_data_reader(
     args = parser.parse_args()
 
     # Construct layer graph
-    input = lbann.Input(
-        target_mode='regression')
-    universes = lbann.Identity(input)
-    secrets = lbann.Identity(input)
+    universes = lbann.Input(data_field='samples')
+    secrets = lbann.Input(data_field='responses')
     statistics_group_size = 1 if args.local_batchnorm else -1
     preds = CosmoFlow(
         input_width=args.input_width,
@@ -347,7 +345,7 @@ def create_cosmoflow_data_reader(
         bn_statistics_group_size=statistics_group_size)(universes)
     mse = lbann.MeanSquaredError([preds, secrets])
     obj = lbann.ObjectiveFunction([mse])
-    layers = list(lbann.traverse_layer_graph(input))
+    layers = list(lbann.traverse_layer_graph([universes, secrets]))
 
     # Set parallel_strategy
     parallel_strategy = get_parallel_strategy_args(
diff --git a/applications/selfsupervised/classifier.py b/applications/selfsupervised/classifier.py
index 128be19d6cc..f870d06c312 100644
--- a/applications/selfsupervised/classifier.py
+++ b/applications/selfsupervised/classifier.py
@@ -18,9 +18,8 @@ def setup(data_reader_file,
           checkpoint_interval=None):
 
     # Setup input data
-    input = lbann.Input(target_mode = 'classification')
-    images = lbann.Identity(input)
-    labels = lbann.Identity(input)
+    images = lbann.Input(data_field='samples')
+    labels = lbann.Input(data_field='labels')
 
     # Classification network
     head_cnn = modules.ResNet(bn_statistics_group_size=bn_statistics_group_size)
@@ -34,7 +33,7 @@ def setup(data_reader_file,
     # Setup objective function
     cross_entropy = lbann.CrossEntropy([probs, labels])
     l2_reg_weights = set()
-    for l in lbann.traverse_layer_graph(input):
+    for l in lbann.traverse_layer_graph([images, labels]):
         if type(l) == lbann.Convolution or type(l) == lbann.FullyConnected:
             l2_reg_weights.update(l.weights)
     l2_reg = lbann.L2WeightRegularization(weights=l2_reg_weights, scale=0.0002)
diff --git a/applications/selfsupervised/pretrain_siamese.py b/applications/selfsupervised/pretrain_siamese.py
index a818f855126..2e0942ad761 100644
--- a/applications/selfsupervised/pretrain_siamese.py
+++ b/applications/selfsupervised/pretrain_siamese.py
@@ -22,7 +22,7 @@ def setup(num_patches=3,
     num_labels = patch_generator.num_labels(num_patches)
 
     # Extract tensors from data sample
-    input = lbann.Input()
+    input = lbann.Input(data_field='samples')
     slice_points = [0]
     for _ in range(num_patches):
         patch_size = functools.reduce(operator.mul, patch_dims)
diff --git a/applications/vision/alexnet.py b/applications/vision/alexnet.py
index a3abcac43e7..d26838ec303 100644
--- a/applications/vision/alexnet.py
+++ b/applications/vision/alexnet.py
@@ -33,15 +33,14 @@
 imagenet_labels = 1000
 
 # Construct layer graph
-input_ = lbann.Input(target_mode='classification')
-images = lbann.Identity(input_)
-labels = lbann.Identity(input_)
+images = lbann.Input(data_field='samples')
+labels = lbann.Input(data_field='labels')
 preds = lbann.models.AlexNet(imagenet_labels)(images)
 probs = lbann.Softmax(preds)
 cross_entropy = lbann.CrossEntropy(probs, labels)
 top1 = lbann.CategoricalAccuracy(probs, labels)
 top5 = lbann.TopKCategoricalAccuracy(probs, labels, k=5)
-layers = list(lbann.traverse_layer_graph(input_))
+layers = list(lbann.traverse_layer_graph([images, labels]))
 
 # Setup objective function
 weights = set()
diff --git a/applications/vision/densenet.py b/applications/vision/densenet.py
index 9b3cdbbcd83..679f60647e2 100644
--- a/applications/vision/densenet.py
+++ b/applications/vision/densenet.py
@@ -348,28 +348,6 @@ def get_args():
     return args
 
 
-def construct_layer_graph(
-        statistics_group_size,
-        version,
-        cumulative_layer_num,
-        input_node):
-    # Input data
-    images_node = lbann.Identity(input_node)
-    cumulative_layer_num += 1
-    log('Identity. cumulative_layer_num={n}'.format(n=cumulative_layer_num))
-
-    # Use input_node, not images_node.
-    image_labels_node = lbann.Identity(input_node)
-    cumulative_layer_num += 1
-    log('Identity. cumulative_layer_num={n}'.format(n=cumulative_layer_num))
-
-    # Use images_node, not image_labels_node.
-    probabilities = densenet(statistics_group_size, version,
-                             cumulative_layer_num, images_node)
-
-    return probabilities, image_labels_node
-
-
 def set_up_experiment(args,
                       input_,
                       probs,
@@ -444,20 +422,23 @@ def main():
     # Construct layer graph
     # ----------------------------------
 
-    input_node = lbann.Input(target_mode='classification')
+    images = lbann.Input(data_field='samples')
     # Start counting cumulative layers at 1.
     cumulative_layer_num = 1
-    log('Input. cumulative_layer_num={n}'.format(n=cumulative_layer_num))
-    (probs, labels) = construct_layer_graph(
-        args.procs_per_node,
-        121, cumulative_layer_num, input_node)
+    log('Input(datum). cumulative_layer_num={n}'.format(n=cumulative_layer_num))
+    labels = lbann.Input(data_field='labels')
+    cumulative_layer_num += 1
+    log('Input(labels). cumulative_layer_num={n}'.format(n=cumulative_layer_num))
+
+    probs = densenet(args.procs_per_node,
+        121, cumulative_layer_num, images)
 
     # ----------------------------------
     # Setup experiment
     # ----------------------------------
 
     (trainer, model, data_reader_proto, optimizer) = set_up_experiment(
-        args, input_node, probs, labels)
+        args, [images, labels], probs, labels)
 
     # ----------------------------------
     # Run experiment
diff --git a/applications/vision/lenet.py b/applications/vision/lenet.py
index 01c3e345e15..ff51f596ff6 100644
--- a/applications/vision/lenet.py
+++ b/applications/vision/lenet.py
@@ -21,9 +21,8 @@
 # ----------------------------------
 
 # Input data
-input_ = lbann.Input(target_mode='classification')
-images = lbann.Identity(input_)
-labels = lbann.Identity(input_)
+images = lbann.Input(data_field='samples')
+labels = lbann.Input(data_field='labels')
 
 # LeNet
 x = lbann.Convolution(images,
@@ -73,7 +72,7 @@
 mini_batch_size = 64
 num_epochs = 20
 model = lbann.Model(num_epochs,
-                    layers=lbann.traverse_layer_graph(input_),
+                    layers=lbann.traverse_layer_graph([images, labels]),
                     objective_function=loss,
                     metrics=[lbann.Metric(acc, name='accuracy', unit='%')],
                     callbacks=[lbann.CallbackPrintModelDescription(),
diff --git a/applications/vision/resnet.py b/applications/vision/resnet.py
index db9d2ccdc1e..48e5f480289 100644
--- a/applications/vision/resnet.py
+++ b/applications/vision/resnet.py
@@ -101,15 +101,14 @@
         width=args.width)
 
 # Construct layer graph
-input_ = lbann.Input(target_mode='classification')
-images = lbann.Identity(input_)
-labels = lbann.Identity(input_)
+images = lbann.Input(data_field='samples')
+labels = lbann.Input(data_field='labels')
 preds = resnet(images)
 probs = lbann.Softmax(preds)
 cross_entropy = lbann.CrossEntropy(probs, labels)
 top1 = lbann.CategoricalAccuracy(probs, labels)
 top5 = lbann.TopKCategoricalAccuracy(probs, labels, k=5)
-layers = list(lbann.traverse_layer_graph(input_))
+layers = list(lbann.traverse_layer_graph([images, labels]))
 
 # Setup tensor core operations (just to demonstrate enum usage)
 tensor_ops_mode = lbann.ConvTensorOpsMode.NO_TENSOR_OPS
diff --git a/bamboo/integration_tests/test_integration_alexnet.py b/bamboo/integration_tests/test_integration_alexnet.py
index d6d11b70ff5..9dd7b1c8c9c 100644
--- a/bamboo/integration_tests/test_integration_alexnet.py
+++ b/bamboo/integration_tests/test_integration_alexnet.py
@@ -74,9 +74,8 @@ def construct_model(lbann):
     import lbann.models
 
     # Layer graph
-    input_ = lbann.Input(target_mode='classification')
-    images = lbann.Identity(input_)
-    labels = lbann.Identity(input_)
+    images = lbann.Input(data_field='samples')
+    labels = lbann.Input(data_field='labels')
     x = lbann.models.AlexNet(1000)(images)
     probs = lbann.Softmax(x)
     cross_entropy = lbann.CrossEntropy(probs, labels)
diff --git a/bamboo/integration_tests/test_integration_atom_wae.py b/bamboo/integration_tests/test_integration_atom_wae.py
index 0db75d86594..a92b72ef94e 100644
--- a/bamboo/integration_tests/test_integration_atom_wae.py
+++ b/bamboo/integration_tests/test_integration_atom_wae.py
@@ -84,7 +84,7 @@ def construct_model(lbann):
 
     data_layout = "data_parallel"
     # Layer graph
-    input_ = lbann.Identity(lbann.Input(name='inp',target_mode="N/A"), name='inp1')
+    input_ = lbann.Input(name='inp', data_field="samples")
     wae_loss= []
     input_feature_dims = sequence_length
 
diff --git a/bamboo/integration_tests/test_integration_lenet.py b/bamboo/integration_tests/test_integration_lenet.py
index 6adad747422..4eb5fbf93bd 100644
--- a/bamboo/integration_tests/test_integration_lenet.py
+++ b/bamboo/integration_tests/test_integration_lenet.py
@@ -73,9 +73,8 @@ def construct_model(lbann):
     import lbann.models
 
     # Layer graph
-    input_ = lbann.Input(target_mode='classification')
-    images = lbann.Identity(input_)
-    labels = lbann.Identity(input_)
+    images = lbann.Input(data_field='samples')
+    labels = lbann.Input(data_field='labels')
     x = lbann.models.LeNet(10)(images)
     probs = lbann.Softmax(x)
     loss = lbann.CrossEntropy(probs, labels)
@@ -87,7 +86,7 @@ def construct_model(lbann):
 
     # Construct model
     return lbann.Model(num_epochs,
-                       layers=lbann.traverse_layer_graph(input_),
+                       layers=lbann.traverse_layer_graph([images, labels]),
                        objective_function=loss,
                        metrics=metrics,
                        callbacks=callbacks)
diff --git a/bamboo/integration_tests/test_integration_resnet50.py b/bamboo/integration_tests/test_integration_resnet50.py
index afb3b623b5f..68d6a7f6b60 100644
--- a/bamboo/integration_tests/test_integration_resnet50.py
+++ b/bamboo/integration_tests/test_integration_resnet50.py
@@ -72,9 +72,8 @@ def construct_model(lbann):
     import lbann.models
 
     # Layer graph
-    input_ = lbann.Input(target_mode='classification')
-    images = lbann.Identity(input_)
-    labels = lbann.Identity(input_)
+    images = lbann.Input(data_field='samples')
+    labels = lbann.Input(data_field='labels')
     x = lbann.models.ResNet50(1000, bn_statistics_group_size=-1)(images)
     probs = lbann.Softmax(x)
     cross_entropy = lbann.CrossEntropy(probs, labels)
diff --git a/bamboo/unit_tests/test_unit_algo_kfac.py b/bamboo/unit_tests/test_unit_algo_kfac.py
index 5aa1d75e7f3..859ef608aa5 100644
--- a/bamboo/unit_tests/test_unit_algo_kfac.py
+++ b/bamboo/unit_tests/test_unit_algo_kfac.py
@@ -118,7 +118,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_dims)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_dims)))
diff --git a/bamboo/unit_tests/test_unit_algo_ltfb.py b/bamboo/unit_tests/test_unit_algo_ltfb.py
index 232642b4e2a..e76f95d2947 100644
--- a/bamboo/unit_tests/test_unit_algo_ltfb.py
+++ b/bamboo/unit_tests/test_unit_algo_ltfb.py
@@ -90,7 +90,7 @@ def construct_model(lbann):
     # Layer graph
     weight = lbann.Weights(initializer=lbann.UniformInitializer(min=0, max=1))
     weight = lbann.WeightsLayer(weights=weight, dims=tools.str_list([1]))
-    rand = lbann.Identity(lbann.Input())
+    rand = lbann.Input(data_field='samples')
     layers = list(lbann.traverse_layer_graph([weight, rand]))
     for l in layers:
         l.device = 'CPU'
diff --git a/bamboo/unit_tests/test_unit_algo_ltfb_trunc_selection.py b/bamboo/unit_tests/test_unit_algo_ltfb_trunc_selection.py
index ac5a6eeb6d2..fa4601db770 100644
--- a/bamboo/unit_tests/test_unit_algo_ltfb_trunc_selection.py
+++ b/bamboo/unit_tests/test_unit_algo_ltfb_trunc_selection.py
@@ -1,7 +1,7 @@
 """Test to check truncation selection exchanges in LTFB.
 
-An LTFB round is performed after every training step and two (truncation_k=2) 
-winners chosen from higher random metric values, propagate their 
+An LTFB round is performed after every training step and two (truncation_k=2)
+winners chosen from higher random metric values, propagate their
 models/topologies to other trainers with lower metric values.
 The log files are post-processed to make sure that the correct weights
 are propagated by LTFB.
@@ -90,7 +90,7 @@ def construct_model(lbann):
     """
 
     # Layer graph
-    rand = lbann.Identity(lbann.Input())
+    rand = lbann.Input(data_field='samples')
     layers = list(lbann.traverse_layer_graph([rand]))
     for l in layers:
         l.device = 'CPU'
@@ -192,8 +192,8 @@ def func(cluster, dirname):
                         continue
                     sending_partner = [[] for _ in range(num_trainers)]
                     tournament_metrics = [[] for _ in range(num_trainers)]
-             
-                #sender 
+
+                #sender
                 match = re.search(
                     'In LTFB TSE .* '
                     'trainer ([0-9]+) with score .* sends model to trainer  ([0-9]+) '
@@ -202,7 +202,7 @@ def func(cluster, dirname):
                 if match:
                     trainer = int(match.group(1))
                     sending_partner[trainer].append(trainer) #ltfb_sender
-                
+
                 #receiver
                 match = re.search(
                     'In LTFB TSE .* '
@@ -213,7 +213,7 @@ def func(cluster, dirname):
                     receiver = looser = trainer = int(match.group(1))
                     sender = winner = partner = int(match.group(2))
                     sending_partner[trainer].append(sender) #ltfb_sender
-                
+
                 # Metric value on tournament (test) set
                 match = re.search(
                     'model0 \\(instance ([0-9]+)\\) test random : '
@@ -245,7 +245,7 @@ def func(cluster, dirname):
                   sender_at_step = sending_partner[trainer][step]
                   trainer_score = tournament_metrics[trainer][step]
                   winning_score = tournament_metrics[sender_at_step][step]
-                    
+
                   assert trainer_score <= winning_score, \
                       'Incorrect metric value for LTFB tournament'
 
diff --git a/bamboo/unit_tests/test_unit_callback_ltfb.py b/bamboo/unit_tests/test_unit_callback_ltfb.py
index eda37f096ca..24157f88546 100644
--- a/bamboo/unit_tests/test_unit_callback_ltfb.py
+++ b/bamboo/unit_tests/test_unit_callback_ltfb.py
@@ -77,7 +77,7 @@ def construct_model(lbann):
     # Layer graph
     weight = lbann.Weights(initializer=lbann.UniformInitializer(min=0, max=1))
     weight = lbann.WeightsLayer(weights=weight, dims=tools.str_list([1]))
-    rand = lbann.Identity(lbann.Input())
+    rand = lbann.Input(data_field='samples')
     layers = list(lbann.traverse_layer_graph([weight, rand]))
     for l in layers:
         l.device = 'CPU'
diff --git a/bamboo/unit_tests/test_unit_callback_ltfb_data.py b/bamboo/unit_tests/test_unit_callback_ltfb_data.py
index 96cff63871f..d7762383d0a 100644
--- a/bamboo/unit_tests/test_unit_callback_ltfb_data.py
+++ b/bamboo/unit_tests/test_unit_callback_ltfb_data.py
@@ -68,7 +68,7 @@ def construct_model(lbann):
     """
 
     # Layer graph
-    step_id = lbann.Identity(lbann.Input())
+    step_id = lbann.Input(data_field='samples')
     for l in lbann.traverse_layer_graph(step_id):
         l.device = 'CPU'
 
@@ -76,6 +76,7 @@ def construct_model(lbann):
     ltfb_interval = 3
     metrics = [lbann.Metric(step_id, name='step id')]
     callbacks = [
+        lbann.CallbackPrint(),
         lbann.CallbackLTFB(
             batch_interval=ltfb_interval,
             metric=metrics[-1].name,
diff --git a/bamboo/unit_tests/test_unit_callback_set_weights_value.py b/bamboo/unit_tests/test_unit_callback_set_weights_value.py
index bff955919e7..5c66fcdd5b2 100644
--- a/bamboo/unit_tests/test_unit_callback_set_weights_value.py
+++ b/bamboo/unit_tests/test_unit_callback_set_weights_value.py
@@ -67,7 +67,7 @@ def construct_model(lbann):
         callbacks.append(
             lbann.CallbackSetWeightsValue(weights=w.name, value=val, step=step)
         )
-    x_lbann = lbann.Identity(lbann.Input())
+    x_lbann = lbann.Input(data_field='samples')
     x = x_lbann
     y = lbann.WeightsLayer(weights=w, dims='1')
     z = lbann.Multiply(x, y)
diff --git a/bamboo/unit_tests/test_unit_checkpoint_lenet.py b/bamboo/unit_tests/test_unit_checkpoint_lenet.py
index aff3dec172b..2c2f54feab3 100644
--- a/bamboo/unit_tests/test_unit_checkpoint_lenet.py
+++ b/bamboo/unit_tests/test_unit_checkpoint_lenet.py
@@ -72,16 +72,15 @@ def construct_model(lbann):
     lbann.models.LeNet.global_count = 0
     lbann.Layer.global_count = 0
     # Layer graph
-    input_ = lbann.Input(target_mode='classification')
-    images = lbann.Identity(input_)
-    labels = lbann.Identity(input_)
+    images = lbann.Input(data_field='samples')
+    labels = lbann.Input(data_field='labels')
     x = lbann.models.LeNet(10)(images)
     probs = lbann.Softmax(x)
     loss = lbann.CrossEntropy(probs, labels)
     acc = lbann.CategoricalAccuracy(probs, labels)
 
     # Make sure all layers are on CPU
-    for layer in lbann.traverse_layer_graph(input_):
+    for layer in lbann.traverse_layer_graph([images, labels]):
         layer.device = 'cpu'
 
     # Objects for LBANN model
@@ -90,7 +89,7 @@ def construct_model(lbann):
 
     # Construct model
     return lbann.Model(num_epochs,
-                       layers=lbann.traverse_layer_graph(input_),
+                       layers=lbann.traverse_layer_graph([images, labels]),
                        objective_function=loss,
                        metrics=metrics,
                        callbacks=callbacks)
diff --git a/bamboo/unit_tests/test_unit_datareader_python.py b/bamboo/unit_tests/test_unit_datareader_python.py
index 2e7d2703c31..ae127a6dddb 100644
--- a/bamboo/unit_tests/test_unit_datareader_python.py
+++ b/bamboo/unit_tests/test_unit_datareader_python.py
@@ -57,7 +57,7 @@ def construct_model(lbann):
     """
 
     # Layer graph
-    x = lbann.Input()
+    x = lbann.Input(data_field='samples')
     y = lbann.L2Norm2(x)
     layers = list(lbann.traverse_layer_graph(x))
     metric = lbann.Metric(y, name='obj')
diff --git a/bamboo/unit_tests/test_unit_datastore_imagenet.py b/bamboo/unit_tests/test_unit_datastore_imagenet.py
index dcf28691e53..edc70d1499b 100644
--- a/bamboo/unit_tests/test_unit_datastore_imagenet.py
+++ b/bamboo/unit_tests/test_unit_datastore_imagenet.py
@@ -53,7 +53,7 @@ def construct_model(lbann):
     import lbann.models
 
     # Layer graph
-    input_ = lbann.Input()
+    input_ = lbann.Input(data_field='samples')
     x = lbann.Identity(input_)
     y = lbann.L2Norm2(x)
     z = lbann.Multiply(y, lbann.Sqrt(lbann.MiniBatchIndex()))
diff --git a/bamboo/unit_tests/test_unit_layer_argmax.py b/bamboo/unit_tests/test_unit_layer_argmax.py
index b8a432ad912..44a82ab6d2b 100644
--- a/bamboo/unit_tests/test_unit_layer_argmax.py
+++ b/bamboo/unit_tests/test_unit_layer_argmax.py
@@ -68,7 +68,7 @@ def l2_norm2(x):
         return np.inner(x, x)
 
     # LBANN implementation
-    x = lbann.Reshape(lbann.Input(), dims=tools.str_list(_sample_dims))
+    x = lbann.Reshape(lbann.Input(data_field='samples'), dims=tools.str_list(_sample_dims))
     y = lbann.Argmax(x, device='cpu')
     z = lbann.L2Norm2(y)
 
diff --git a/bamboo/unit_tests/test_unit_layer_argmin.py b/bamboo/unit_tests/test_unit_layer_argmin.py
index 8e21845f743..495c259bcf9 100644
--- a/bamboo/unit_tests/test_unit_layer_argmin.py
+++ b/bamboo/unit_tests/test_unit_layer_argmin.py
@@ -63,7 +63,7 @@ def construct_model(lbann):
     """
 
     # LBANN implementation
-    x = lbann.Reshape(lbann.Input(), dims=tools.str_list(_sample_dims))
+    x = lbann.Reshape(lbann.Input(data_field='samples'), dims=tools.str_list(_sample_dims))
     y = lbann.Argmin(x, device='cpu')
     z = lbann.L2Norm2(y)
 
diff --git a/bamboo/unit_tests/test_unit_layer_batch_normalization.py b/bamboo/unit_tests/test_unit_layer_batch_normalization.py
index b920d2226da..d5abf818efb 100644
--- a/bamboo/unit_tests/test_unit_layer_batch_normalization.py
+++ b/bamboo/unit_tests/test_unit_layer_batch_normalization.py
@@ -64,7 +64,7 @@ def construct_model(lbann):
     # object, construct a zero-valued tensor, and add it to the
     # input. To make sure that batchnorm is non-trivial, we multiply
     # the zero-valued tensor by the mini-batch index.
-    x = lbann.Reshape(lbann.Input(), dims=tools.str_list(_sample_dims))
+    x = lbann.Reshape(lbann.Input(data_field='samples'), dims=tools.str_list(_sample_dims))
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
diff --git a/bamboo/unit_tests/test_unit_layer_batched_matmul.py b/bamboo/unit_tests/test_unit_layer_batched_matmul.py
index 22bd371d2fa..34cbb20dea7 100644
--- a/bamboo/unit_tests/test_unit_layer_batched_matmul.py
+++ b/bamboo/unit_tests/test_unit_layer_batched_matmul.py
@@ -68,7 +68,7 @@ def construct_model(lbann):
     x1_weights = lbann.Weights(optimizer=lbann.SGD(),
                                initializer=lbann.ConstantInitializer(value=0.0),
                                name='input1_weights')
-    x_slice = lbann.Slice(lbann.Input(),
+    x_slice = lbann.Slice(lbann.Input(data_field='samples'),
                           slice_points=tools.str_list([0, _N*_m*_k, _N*_m*_k+_N*_k*_n]))
     x0 = lbann.Sum(x_slice,
                    lbann.WeightsLayer(weights=x0_weights, dims=str(_N*_m*_k)))
diff --git a/bamboo/unit_tests/test_unit_layer_batchwise_reduce_sum.py b/bamboo/unit_tests/test_unit_layer_batchwise_reduce_sum.py
index 7ec7814b244..33ee66d3089 100644
--- a/bamboo/unit_tests/test_unit_layer_batchwise_reduce_sum.py
+++ b/bamboo/unit_tests/test_unit_layer_batchwise_reduce_sum.py
@@ -69,7 +69,7 @@ def construct_model(lbann):
         name='input_weights'
     )
     x = lbann.Multiply(
-        lbann.Input(),
+        lbann.Input(data_field='samples'),
         lbann.WeightsLayer(weights=x_weights, dims=tools.str_list(_sample_size)),
     )
 
diff --git a/bamboo/unit_tests/test_unit_layer_channelwise_fully_connected.py b/bamboo/unit_tests/test_unit_layer_channelwise_fully_connected.py
index 85672d726ca..b9b9f5f0c3c 100644
--- a/bamboo/unit_tests/test_unit_layer_channelwise_fully_connected.py
+++ b/bamboo/unit_tests/test_unit_layer_channelwise_fully_connected.py
@@ -68,7 +68,7 @@ def construct_model(lbann):
                               name='input_weights')
     x0 = lbann.WeightsLayer(weights=x_weights,
                             dims=tools.str_list(_sample_dims))
-    x1 = lbann.Reshape(lbann.Input(), dims=tools.str_list(_sample_dims))
+    x1 = lbann.Reshape(lbann.Input(data_field='samples'), dims=tools.str_list(_sample_dims))
     x = lbann.Sum(x0, x1)
     x_lbann = x
 
diff --git a/bamboo/unit_tests/test_unit_layer_channelwise_fully_connected_distconv.py b/bamboo/unit_tests/test_unit_layer_channelwise_fully_connected_distconv.py
index 58a202d8e26..0b0465ef1ab 100644
--- a/bamboo/unit_tests/test_unit_layer_channelwise_fully_connected_distconv.py
+++ b/bamboo/unit_tests/test_unit_layer_channelwise_fully_connected_distconv.py
@@ -4,7 +4,7 @@
 import os.path
 import sys
 import numpy as np
-import pytest 
+import pytest
 
 # Bamboo utilities
 current_file = os.path.realpath(__file__)
@@ -79,7 +79,7 @@ def construct_model(lbann):
                               name='input_weights')
     x0 = lbann.WeightsLayer(weights=x_weights,
                             dims=tools.str_list(_sample_dims))
-    x1 = lbann.Reshape(lbann.Input(), dims=tools.str_list(_sample_dims), name="Input_layer")
+    x1 = lbann.Reshape(lbann.Input(data_field='samples'), dims=tools.str_list(_sample_dims), name="Input_layer")
     x = lbann.Sum(x0, x1, name="Adding_weight_layer")
     x_lbann = x
 
diff --git a/bamboo/unit_tests/test_unit_layer_channelwise_gru_cell.py b/bamboo/unit_tests/test_unit_layer_channelwise_gru_cell.py
index decc86331e7..1c4c502d91c 100644
--- a/bamboo/unit_tests/test_unit_layer_channelwise_gru_cell.py
+++ b/bamboo/unit_tests/test_unit_layer_channelwise_gru_cell.py
@@ -105,7 +105,7 @@ def construct_model(lbann):
                               name='input')
     h_weights = lbann.Weights(initializer=lbann.ConstantInitializer(value=0.0),
                               name='inital_hidden')
-    input_ = lbann.Identity(lbann.Input())
+    input_ = lbann.Input(data_field='samples')
     input_slice = lbann.Slice(
         input_,
         slice_points=tools.str_list([0, _num_channels*_input_size, _sample_size]),
diff --git a/bamboo/unit_tests/test_unit_layer_channelwise_scale_bias.py b/bamboo/unit_tests/test_unit_layer_channelwise_scale_bias.py
index ef68595500f..bf75f3ef580 100644
--- a/bamboo/unit_tests/test_unit_layer_channelwise_scale_bias.py
+++ b/bamboo/unit_tests/test_unit_layer_channelwise_scale_bias.py
@@ -68,7 +68,7 @@ def construct_model(lbann):
                               name='input_weights')
     x0 = lbann.WeightsLayer(weights=x_weights,
                             dims=tools.str_list(_sample_dims))
-    x1 = lbann.Reshape(lbann.Input(), dims=tools.str_list(_sample_dims))
+    x1 = lbann.Reshape(lbann.Input(data_field='samples'), dims=tools.str_list(_sample_dims))
     x = lbann.Sum(x0, x1)
 
     # Apply channel-wise scale/bias
diff --git a/bamboo/unit_tests/test_unit_layer_channelwise_softmax.py b/bamboo/unit_tests/test_unit_layer_channelwise_softmax.py
index fce94d8dba3..0090f6d5d39 100644
--- a/bamboo/unit_tests/test_unit_layer_channelwise_softmax.py
+++ b/bamboo/unit_tests/test_unit_layer_channelwise_softmax.py
@@ -76,7 +76,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_dims)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_dims)))
diff --git a/bamboo/unit_tests/test_unit_layer_clamp.py b/bamboo/unit_tests/test_unit_layer_clamp.py
index 21460b26bb0..ba8be6a3585 100644
--- a/bamboo/unit_tests/test_unit_layer_clamp.py
+++ b/bamboo/unit_tests/test_unit_layer_clamp.py
@@ -69,7 +69,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_concatenate.py b/bamboo/unit_tests/test_unit_layer_concatenate.py
index 1696e70d32f..4f36128f755 100644
--- a/bamboo/unit_tests/test_unit_layer_concatenate.py
+++ b/bamboo/unit_tests/test_unit_layer_concatenate.py
@@ -64,7 +64,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Input(),
+    x = lbann.Sum(lbann.Input(data_field='samples'),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
     x_lbann = x
diff --git a/bamboo/unit_tests/test_unit_layer_convolution.py b/bamboo/unit_tests/test_unit_layer_convolution.py
index 380a5045839..27362515cf6 100644
--- a/bamboo/unit_tests/test_unit_layer_convolution.py
+++ b/bamboo/unit_tests/test_unit_layer_convolution.py
@@ -138,7 +138,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_dims)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_dims)))
diff --git a/bamboo/unit_tests/test_unit_layer_convolution_distconv.py b/bamboo/unit_tests/test_unit_layer_convolution_distconv.py
index f46a711ce6a..d1c5ab7ca5d 100644
--- a/bamboo/unit_tests/test_unit_layer_convolution_distconv.py
+++ b/bamboo/unit_tests/test_unit_layer_convolution_distconv.py
@@ -143,7 +143,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_dims)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_dims)))
diff --git a/bamboo/unit_tests/test_unit_layer_covariance.py b/bamboo/unit_tests/test_unit_layer_covariance.py
index c36dff46d87..b6a54927521 100644
--- a/bamboo/unit_tests/test_unit_layer_covariance.py
+++ b/bamboo/unit_tests/test_unit_layer_covariance.py
@@ -65,7 +65,7 @@ def construct_model(lbann):
     x1_weights = lbann.Weights(optimizer=lbann.SGD(),
                                initializer=lbann.ConstantInitializer(value=0.0),
                                name='input1_weights')
-    x_slice = lbann.Slice(lbann.Input(),
+    x_slice = lbann.Slice(lbann.Input(data_field='samples'),
                           slice_points=tools.str_list([0, slice_size, 2*slice_size]))
     x0 = lbann.Sum(x_slice,
                    lbann.WeightsLayer(weights=x0_weights, dims=str(slice_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_cross_entropy.py b/bamboo/unit_tests/test_unit_layer_cross_entropy.py
index 3b81f90176b..dace84256e2 100644
--- a/bamboo/unit_tests/test_unit_layer_cross_entropy.py
+++ b/bamboo/unit_tests/test_unit_layer_cross_entropy.py
@@ -94,7 +94,7 @@ def construct_model(lbann):
     x1_weights = lbann.Weights(optimizer=lbann.SGD(),
                                initializer=lbann.ConstantInitializer(value=0.0),
                                name='input1_weights')
-    x_slice = lbann.Slice(lbann.Input(),
+    x_slice = lbann.Slice(lbann.Input(data_field='samples'),
                           slice_points=tools.str_list([0, slice_size, 2*slice_size]))
     x0 = lbann.Sum(x_slice,
                    lbann.WeightsLayer(weights=x0_weights, dims=str(slice_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_dft_abs.py b/bamboo/unit_tests/test_unit_layer_dft_abs.py
index 96bbe5124c4..f984e05e98e 100644
--- a/bamboo/unit_tests/test_unit_layer_dft_abs.py
+++ b/bamboo/unit_tests/test_unit_layer_dft_abs.py
@@ -97,7 +97,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_dims)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_dims)))
diff --git a/bamboo/unit_tests/test_unit_layer_dist_embedding.py b/bamboo/unit_tests/test_unit_layer_dist_embedding.py
index 303692cb910..052109c5c9d 100644
--- a/bamboo/unit_tests/test_unit_layer_dist_embedding.py
+++ b/bamboo/unit_tests/test_unit_layer_dist_embedding.py
@@ -59,7 +59,7 @@ def construct_model(lbann):
     """
 
     # Input data
-    x = lbann.Identity(lbann.Input())
+    x = lbann.Identity(lbann.Input(data_field='samples'))
     x_lbann = x
 
     # Objects for LBANN model
diff --git a/bamboo/unit_tests/test_unit_layer_elu.py b/bamboo/unit_tests/test_unit_layer_elu.py
index d64471eebe4..6391db4b8ce 100644
--- a/bamboo/unit_tests/test_unit_layer_elu.py
+++ b/bamboo/unit_tests/test_unit_layer_elu.py
@@ -67,7 +67,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_embedding.py b/bamboo/unit_tests/test_unit_layer_embedding.py
index ea0738d5888..e2bce911218 100644
--- a/bamboo/unit_tests/test_unit_layer_embedding.py
+++ b/bamboo/unit_tests/test_unit_layer_embedding.py
@@ -58,7 +58,7 @@ def construct_model(lbann):
     """
 
     # Input data
-    x = lbann.Identity(lbann.Input())
+    x = lbann.Input(data_field='samples')
     x_lbann = x
 
     # Objects for LBANN model
diff --git a/bamboo/unit_tests/test_unit_layer_entrywise_batch_normalization.py b/bamboo/unit_tests/test_unit_layer_entrywise_batch_normalization.py
index b8565923752..d6b7c63536e 100644
--- a/bamboo/unit_tests/test_unit_layer_entrywise_batch_normalization.py
+++ b/bamboo/unit_tests/test_unit_layer_entrywise_batch_normalization.py
@@ -64,7 +64,7 @@ def construct_model(lbann):
     # object, construct a zero-valued tensor, and add it to the
     # input. To make sure that batchnorm is non-trivial, we multiply
     # the zero-valued tensor by the mini-batch index.
-    x = lbann.Reshape(lbann.Input(), dims=tools.str_list(_sample_dims))
+    x = lbann.Reshape(lbann.Input(data_field='samples'), dims=tools.str_list(_sample_dims))
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
diff --git a/bamboo/unit_tests/test_unit_layer_entrywise_scale_bias.py b/bamboo/unit_tests/test_unit_layer_entrywise_scale_bias.py
index 05284496b74..7d28aaf3a8e 100644
--- a/bamboo/unit_tests/test_unit_layer_entrywise_scale_bias.py
+++ b/bamboo/unit_tests/test_unit_layer_entrywise_scale_bias.py
@@ -66,7 +66,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_erf.py b/bamboo/unit_tests/test_unit_layer_erf.py
index 5ae6f6f55d9..e1a15aa939f 100644
--- a/bamboo/unit_tests/test_unit_layer_erf.py
+++ b/bamboo/unit_tests/test_unit_layer_erf.py
@@ -64,7 +64,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_erfinv.py b/bamboo/unit_tests/test_unit_layer_erfinv.py
index f50adad4d61..7d732476700 100644
--- a/bamboo/unit_tests/test_unit_layer_erfinv.py
+++ b/bamboo/unit_tests/test_unit_layer_erfinv.py
@@ -65,7 +65,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_fully_connected.py b/bamboo/unit_tests/test_unit_layer_fully_connected.py
index b05355738b3..d2540f3a7ab 100644
--- a/bamboo/unit_tests/test_unit_layer_fully_connected.py
+++ b/bamboo/unit_tests/test_unit_layer_fully_connected.py
@@ -64,7 +64,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_input_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_input_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_gather.py b/bamboo/unit_tests/test_unit_layer_gather.py
index 3eab0bb753d..c7c3ed59ab6 100644
--- a/bamboo/unit_tests/test_unit_layer_gather.py
+++ b/bamboo/unit_tests/test_unit_layer_gather.py
@@ -67,7 +67,7 @@ def construct_model(lbann):
     # Input data
     # Note: Sum with a weights layer so that gradient checking will
     # verify that error signals are correct.
-    x = lbann.Identity(lbann.Input())
+    x = lbann.Input(data_field='samples')
     x_slice = lbann.Slice(
         x,
         slice_points=tools.str_list([0,input_size,input_size+output_size]),
@@ -127,12 +127,12 @@ def construct_model(lbann):
 
     ######################################################################
     #
-    #          2D Values , 1D Input, Axis = 0 
+    #          2D Values , 1D Input, Axis = 0
     #
     ######################################################################
 
     x0 = lbann.Reshape(x0_lbann, dims=tools.str_list([num_rows, num_columns]))
-    
+
     x1 = lbann.Identity(x1_lbann, name="indices_2D_axis_0")
 
     y0 = lbann.Gather(x0,x1, name="Gather_2D_axis_0", axis=0)
@@ -179,12 +179,12 @@ def construct_model(lbann):
 
     ######################################################################
     #
-    #          2D Values , 1D Input, Axis = 1 
+    #          2D Values , 1D Input, Axis = 1
     #
     ######################################################################
 
     x0 = lbann.Reshape(x0_lbann, dims=tools.str_list([num_rows, num_columns]))
-    
+
     x1 = lbann.Identity(x1_lbann, name="Indices_2D")
 
     y0 = lbann.Gather(x0,x1, name="Gather_2D", axis=1)
diff --git a/bamboo/unit_tests/test_unit_layer_gru.py b/bamboo/unit_tests/test_unit_layer_gru.py
index 09a9bb042a7..e8e88f31754 100644
--- a/bamboo/unit_tests/test_unit_layer_gru.py
+++ b/bamboo/unit_tests/test_unit_layer_gru.py
@@ -114,7 +114,7 @@ def construct_model(lbann):
                               name='input')
     h_weights = lbann.Weights(initializer=lbann.ConstantInitializer(value=0.0),
                               name='inital_hidden')
-    input_ = lbann.Identity(lbann.Input())
+    input_ = lbann.Input(data_field='samples')
     input_slice = lbann.Slice(
         input_,
         slice_points=tools.str_list([0, _sequence_length*_input_size, _sample_size]),
diff --git a/bamboo/unit_tests/test_unit_layer_identity.py b/bamboo/unit_tests/test_unit_layer_identity.py
index f860a0d6766..e3e3589edcd 100644
--- a/bamboo/unit_tests/test_unit_layer_identity.py
+++ b/bamboo/unit_tests/test_unit_layer_identity.py
@@ -63,7 +63,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_identity_distconv.py b/bamboo/unit_tests/test_unit_layer_identity_distconv.py
index 59200806bcc..5fa80060ded 100644
--- a/bamboo/unit_tests/test_unit_layer_identity_distconv.py
+++ b/bamboo/unit_tests/test_unit_layer_identity_distconv.py
@@ -67,7 +67,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_instance_norm.py b/bamboo/unit_tests/test_unit_layer_instance_norm.py
index 56e3101bdaf..b207b77743a 100644
--- a/bamboo/unit_tests/test_unit_layer_instance_norm.py
+++ b/bamboo/unit_tests/test_unit_layer_instance_norm.py
@@ -76,7 +76,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_dims)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_dims)))
diff --git a/bamboo/unit_tests/test_unit_layer_l1_norm.py b/bamboo/unit_tests/test_unit_layer_l1_norm.py
index 4f0c3ff52e1..4820fa6034f 100644
--- a/bamboo/unit_tests/test_unit_layer_l1_norm.py
+++ b/bamboo/unit_tests/test_unit_layer_l1_norm.py
@@ -67,7 +67,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_layer_norm.py b/bamboo/unit_tests/test_unit_layer_layer_norm.py
index 1b694fa9215..a1bc00fef84 100644
--- a/bamboo/unit_tests/test_unit_layer_layer_norm.py
+++ b/bamboo/unit_tests/test_unit_layer_layer_norm.py
@@ -74,7 +74,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_leaky_relu.py b/bamboo/unit_tests/test_unit_layer_leaky_relu.py
index 655756ac454..d05b8540955 100644
--- a/bamboo/unit_tests/test_unit_layer_leaky_relu.py
+++ b/bamboo/unit_tests/test_unit_layer_leaky_relu.py
@@ -67,7 +67,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_leaky_relu_distconv.py b/bamboo/unit_tests/test_unit_layer_leaky_relu_distconv.py
index e5d08ebedcc..d70725e9163 100644
--- a/bamboo/unit_tests/test_unit_layer_leaky_relu_distconv.py
+++ b/bamboo/unit_tests/test_unit_layer_leaky_relu_distconv.py
@@ -71,7 +71,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_log_sigmoid.py b/bamboo/unit_tests/test_unit_layer_log_sigmoid.py
index 21a17e0ba70..af81d0350ea 100644
--- a/bamboo/unit_tests/test_unit_layer_log_sigmoid.py
+++ b/bamboo/unit_tests/test_unit_layer_log_sigmoid.py
@@ -65,7 +65,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_log_softmax.py b/bamboo/unit_tests/test_unit_layer_log_softmax.py
index 57c3e5066dd..4d3ffe36d72 100644
--- a/bamboo/unit_tests/test_unit_layer_log_softmax.py
+++ b/bamboo/unit_tests/test_unit_layer_log_softmax.py
@@ -78,7 +78,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_matmul.py b/bamboo/unit_tests/test_unit_layer_matmul.py
index 025dccb89ce..fe866d999ec 100644
--- a/bamboo/unit_tests/test_unit_layer_matmul.py
+++ b/bamboo/unit_tests/test_unit_layer_matmul.py
@@ -67,7 +67,7 @@ def construct_model(lbann):
     x1_weights = lbann.Weights(optimizer=lbann.SGD(),
                                initializer=lbann.ConstantInitializer(value=0.0),
                                name='input1_weights')
-    x_slice = lbann.Slice(lbann.Input(),
+    x_slice = lbann.Slice(lbann.Input(data_field='samples'),
                           slice_points=tools.str_list([0, _m*_k, _m*_k+_k*_n]))
     x0 = lbann.Sum(x_slice,
                    lbann.WeightsLayer(weights=x0_weights, dims=str(_m*_k)))
diff --git a/bamboo/unit_tests/test_unit_layer_mean_absolute_error.py b/bamboo/unit_tests/test_unit_layer_mean_absolute_error.py
index 1ed30def90c..91ca5cb64a2 100644
--- a/bamboo/unit_tests/test_unit_layer_mean_absolute_error.py
+++ b/bamboo/unit_tests/test_unit_layer_mean_absolute_error.py
@@ -68,7 +68,7 @@ def construct_model(lbann):
     x1_weights = lbann.Weights(optimizer=lbann.SGD(),
                                initializer=lbann.ConstantInitializer(value=0.0),
                                name='input1_weights')
-    x_slice = lbann.Slice(lbann.Input(),
+    x_slice = lbann.Slice(lbann.Input(data_field='samples'),
                           slice_points=tools.str_list([0, slice_size, 2*slice_size]))
     x0 = lbann.Sum(x_slice,
                    lbann.WeightsLayer(weights=x0_weights, dims=str(slice_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_mean_squared_error.py b/bamboo/unit_tests/test_unit_layer_mean_squared_error.py
index 7276c8f9477..8c34a20eefa 100644
--- a/bamboo/unit_tests/test_unit_layer_mean_squared_error.py
+++ b/bamboo/unit_tests/test_unit_layer_mean_squared_error.py
@@ -65,7 +65,7 @@ def construct_model(lbann):
     x1_weights = lbann.Weights(optimizer=lbann.SGD(),
                                initializer=lbann.ConstantInitializer(value=0.0),
                                name='input1_weights')
-    x_slice = lbann.Slice(lbann.Input(),
+    x_slice = lbann.Slice(lbann.Input(data_field='samples'),
                           slice_points=tools.str_list([0, slice_size, 2*slice_size]))
     x0 = lbann.Sum(x_slice,
                    lbann.WeightsLayer(weights=x0_weights, dims=str(slice_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_one_hot.py b/bamboo/unit_tests/test_unit_layer_one_hot.py
index ae16933ba4b..3955ecd35fe 100644
--- a/bamboo/unit_tests/test_unit_layer_one_hot.py
+++ b/bamboo/unit_tests/test_unit_layer_one_hot.py
@@ -58,7 +58,7 @@ def construct_model(lbann):
     """
 
     # Input data
-    x_lbann = lbann.Identity(lbann.Input())
+    x_lbann = lbann.Input(data_field='samples')
     y_numpy = np.random.normal(size=one_hot_size).astype(np.float32)
     y_numpy[:] = 1 ### @todo Remove
     y_lbann = lbann.Weights(
diff --git a/bamboo/unit_tests/test_unit_layer_pooling.py b/bamboo/unit_tests/test_unit_layer_pooling.py
index 63d19d5518a..bb858bca7bc 100644
--- a/bamboo/unit_tests/test_unit_layer_pooling.py
+++ b/bamboo/unit_tests/test_unit_layer_pooling.py
@@ -124,7 +124,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_dims)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_dims)))
diff --git a/bamboo/unit_tests/test_unit_layer_pooling_distconv.py b/bamboo/unit_tests/test_unit_layer_pooling_distconv.py
index 7ea246ca1d7..37d58ae6ae9 100644
--- a/bamboo/unit_tests/test_unit_layer_pooling_distconv.py
+++ b/bamboo/unit_tests/test_unit_layer_pooling_distconv.py
@@ -128,7 +128,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_dims)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_dims)))
diff --git a/bamboo/unit_tests/test_unit_layer_reduction.py b/bamboo/unit_tests/test_unit_layer_reduction.py
index 9bf12a006c9..17eef8bb62b 100644
--- a/bamboo/unit_tests/test_unit_layer_reduction.py
+++ b/bamboo/unit_tests/test_unit_layer_reduction.py
@@ -61,7 +61,7 @@ def construct_model(lbann):
     # Note: Slice to separate the last entry in the input tensor. Sum
     # with a weights layer so that gradient checking will verify that
     # error signals are correct.
-    x = lbann.Identity(lbann.Input())
+    x = lbann.Input(data_field='samples')
     x = lbann.Slice(x, slice_points=tools.str_list([0,_sample_size-1,_sample_size]))
     x1 = lbann.Identity(x)
     x2 = lbann.Identity(x)
diff --git a/bamboo/unit_tests/test_unit_layer_relu.py b/bamboo/unit_tests/test_unit_layer_relu.py
index 8354ec2e27b..c7dd85339cc 100644
--- a/bamboo/unit_tests/test_unit_layer_relu.py
+++ b/bamboo/unit_tests/test_unit_layer_relu.py
@@ -67,7 +67,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_relu_distconv.py b/bamboo/unit_tests/test_unit_layer_relu_distconv.py
index 4eab8b07fde..95a0480d4df 100644
--- a/bamboo/unit_tests/test_unit_layer_relu_distconv.py
+++ b/bamboo/unit_tests/test_unit_layer_relu_distconv.py
@@ -71,7 +71,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_rowwise_weights_norms.py b/bamboo/unit_tests/test_unit_layer_rowwise_weights_norms.py
index 18c78a3ee86..af19842d6d7 100644
--- a/bamboo/unit_tests/test_unit_layer_rowwise_weights_norms.py
+++ b/bamboo/unit_tests/test_unit_layer_rowwise_weights_norms.py
@@ -63,7 +63,7 @@ def construct_model(lbann):
     mat = np.random.normal(size=(height, width)).astype(np.float32)
 
     # Input data
-    x_lbann = lbann.Identity(lbann.Input())
+    x_lbann = lbann.Input(data_field='samples')
 
     # Objects for LBANN model
     layers = [x_lbann]
diff --git a/bamboo/unit_tests/test_unit_layer_scatter.py b/bamboo/unit_tests/test_unit_layer_scatter.py
index e1e44956469..36e5ddb5eba 100644
--- a/bamboo/unit_tests/test_unit_layer_scatter.py
+++ b/bamboo/unit_tests/test_unit_layer_scatter.py
@@ -20,7 +20,7 @@
 # Data
 width = 13
 height = 7
-input_size = width * height 
+input_size = width * height
 output_size = 23
 seed = 20210127
 
@@ -67,7 +67,7 @@ def construct_model(lbann):
     # Input data
     # Note: Sum with a weights layer so that gradient checking will
     # verify that error signals are correct.
-    x = lbann.Identity(lbann.Input())
+    x = lbann.Input(data_field='samples')
     x_slice = lbann.Slice(
         x,
         slice_points=tools.str_list([0,input_size,2*input_size]),
@@ -97,7 +97,7 @@ def construct_model(lbann):
 
     # Objects for LBANN model
 
-    
+
     obj = []
     metrics = []
     callbacks = []
@@ -131,7 +131,7 @@ def construct_model(lbann):
 
     ######################################################################
     #
-    #          2D Values , 1D Input, Axis = 0 
+    #          2D Values , 1D Input, Axis = 0
     #
     ######################################################################
 
@@ -156,7 +156,7 @@ def construct_model(lbann):
     obj.append(z)
     metrics.append(lbann.Metric(z, name='2D, axis=0'))
 
-    vals = [] 
+    vals = []
 
     for i in range(num_samples()):
         _x = get_sample(i)
@@ -167,9 +167,9 @@ def construct_model(lbann):
 
         for i in range(height):
             if 0 <= x1[i] < output_size:
-                for j in range(width):                
+                for j in range(width):
                     y0[int(x1[i])][j] += x0[i][j]
-        z = 0 
+        z = 0
         for i in range(width * output_size):
             z += ((i + 1) * y0.flatten()[i])**2
         vals.append(z)
@@ -183,7 +183,7 @@ def construct_model(lbann):
         execution_modes='test'))
     ######################################################################
     #
-    #          2D Values , 1D Input, Axis = 1 
+    #          2D Values , 1D Input, Axis = 1
     #
     ######################################################################
 
@@ -208,7 +208,7 @@ def construct_model(lbann):
     obj.append(z)
     metrics.append(lbann.Metric(z, name='2D, axis=1'))
 
-    vals = [] 
+    vals = []
 
     for i in range(num_samples()):
         _x = get_sample(i)
@@ -221,7 +221,7 @@ def construct_model(lbann):
             for j in range(height):
                 if 0 <= x1[i] < output_size:
                     y0[j][int(x1[i])] += x0[j][i]
-        z = 0 
+        z = 0
         for i in range(height * output_size):
             z += ((i + 1) * y0.flatten()[i])**2
         vals.append(z)
@@ -234,7 +234,7 @@ def construct_model(lbann):
         error_on_failure=True,
         execution_modes='test'))
     # Gradient checking
-    
+
     callbacks.append(lbann.CallbackCheckGradients(error_on_failure=True))
 
     # Construct model
diff --git a/bamboo/unit_tests/test_unit_layer_selu.py b/bamboo/unit_tests/test_unit_layer_selu.py
index cde6bf0b53f..434dccca839 100644
--- a/bamboo/unit_tests/test_unit_layer_selu.py
+++ b/bamboo/unit_tests/test_unit_layer_selu.py
@@ -83,7 +83,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_sigmoid.py b/bamboo/unit_tests/test_unit_layer_sigmoid.py
index b63a35ccf7d..1815e26c1d4 100644
--- a/bamboo/unit_tests/test_unit_layer_sigmoid.py
+++ b/bamboo/unit_tests/test_unit_layer_sigmoid.py
@@ -65,7 +65,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_sigmoid_binary_cross_entropy.py b/bamboo/unit_tests/test_unit_layer_sigmoid_binary_cross_entropy.py
index 649922a8aed..2fdc059b0e1 100644
--- a/bamboo/unit_tests/test_unit_layer_sigmoid_binary_cross_entropy.py
+++ b/bamboo/unit_tests/test_unit_layer_sigmoid_binary_cross_entropy.py
@@ -68,7 +68,7 @@ def construct_model(lbann):
     x1_weights = lbann.Weights(optimizer=lbann.SGD(),
                                initializer=lbann.ConstantInitializer(value=0.0),
                                name='input1_weights')
-    x_slice = lbann.Slice(lbann.Input(),
+    x_slice = lbann.Slice(lbann.Input(data_field='samples'),
                           slice_points=tools.str_list([0, slice_size, 2*slice_size]))
     x0 = lbann.Sum(x_slice,
                    lbann.WeightsLayer(weights=x0_weights, dims=str(slice_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_slice.py b/bamboo/unit_tests/test_unit_layer_slice.py
index a3b8d480ecb..fc3a4e2e474 100644
--- a/bamboo/unit_tests/test_unit_layer_slice.py
+++ b/bamboo/unit_tests/test_unit_layer_slice.py
@@ -65,7 +65,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_dims)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_dims)))
diff --git a/bamboo/unit_tests/test_unit_layer_softmax.py b/bamboo/unit_tests/test_unit_layer_softmax.py
index 73188ba1ccd..37d77cffcc3 100644
--- a/bamboo/unit_tests/test_unit_layer_softmax.py
+++ b/bamboo/unit_tests/test_unit_layer_softmax.py
@@ -79,7 +79,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_softplus.py b/bamboo/unit_tests/test_unit_layer_softplus.py
index 3e4df8127d4..a11e3883754 100644
--- a/bamboo/unit_tests/test_unit_layer_softplus.py
+++ b/bamboo/unit_tests/test_unit_layer_softplus.py
@@ -65,7 +65,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_softsign.py b/bamboo/unit_tests/test_unit_layer_softsign.py
index f9198d07da5..26e9972bb16 100644
--- a/bamboo/unit_tests/test_unit_layer_softsign.py
+++ b/bamboo/unit_tests/test_unit_layer_softsign.py
@@ -63,7 +63,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_squared_difference.py b/bamboo/unit_tests/test_unit_layer_squared_difference.py
index 8a5dd2bdb3e..a779e6de9f7 100644
--- a/bamboo/unit_tests/test_unit_layer_squared_difference.py
+++ b/bamboo/unit_tests/test_unit_layer_squared_difference.py
@@ -65,7 +65,7 @@ def construct_model(lbann):
     x1_weights = lbann.Weights(optimizer=lbann.SGD(),
                                initializer=lbann.ConstantInitializer(value=0.0),
                                name='input1_weights')
-    x_slice = lbann.Slice(lbann.Input(),
+    x_slice = lbann.Slice(lbann.Input(data_field='samples'),
                           slice_points=tools.str_list([0, slice_size, 2*slice_size]))
     x0 = lbann.Sum(x_slice,
                    lbann.WeightsLayer(weights=x0_weights, dims=str(slice_size)))
diff --git a/bamboo/unit_tests/test_unit_layer_tessellate.py b/bamboo/unit_tests/test_unit_layer_tessellate.py
index 94da3303861..15db12975a9 100644
--- a/bamboo/unit_tests/test_unit_layer_tessellate.py
+++ b/bamboo/unit_tests/test_unit_layer_tessellate.py
@@ -65,7 +65,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_dims)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_dims)))
diff --git a/bamboo/unit_tests/test_unit_layer_uniform_hash.py b/bamboo/unit_tests/test_unit_layer_uniform_hash.py
index 30726aec8ee..1db9e4647a7 100644
--- a/bamboo/unit_tests/test_unit_layer_uniform_hash.py
+++ b/bamboo/unit_tests/test_unit_layer_uniform_hash.py
@@ -77,7 +77,7 @@ def construct_model(lbann):
     """
 
     # Input data
-    x_lbann = lbann.Identity(lbann.Input())
+    x_lbann = lbann.Input(data_field='samples')
 
     # Objects for LBANN model
     obj = []
diff --git a/bamboo/unit_tests/test_unit_layer_variance.py b/bamboo/unit_tests/test_unit_layer_variance.py
index d9a365ed8cc..500763a326b 100644
--- a/bamboo/unit_tests/test_unit_layer_variance.py
+++ b/bamboo/unit_tests/test_unit_layer_variance.py
@@ -63,7 +63,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/bamboo/unit_tests/test_unit_load_weights_lenet.py b/bamboo/unit_tests/test_unit_load_weights_lenet.py
index f7383fdb784..9043467f753 100644
--- a/bamboo/unit_tests/test_unit_load_weights_lenet.py
+++ b/bamboo/unit_tests/test_unit_load_weights_lenet.py
@@ -74,16 +74,15 @@ def construct_model(lbann):
     lbann.models.LeNet.global_count = 0
     lbann.Layer.global_count = 0
     # Layer graph
-    input_ = lbann.Input(target_mode='classification')
-    images = lbann.Identity(input_)
-    labels = lbann.Identity(input_)
+    images = lbann.Input(data_field='samples')
+    labels = lbann.Input(data_field='labels')
     x = lbann.models.LeNet(10)(images)
     probs = lbann.Softmax(x)
     loss = lbann.CrossEntropy(probs, labels)
     acc = lbann.CategoricalAccuracy(probs, labels)
 
     # Make sure all layers are on CPU
-    for layer in lbann.traverse_layer_graph(input_):
+    for layer in lbann.traverse_layer_graph([images, labels]):
         layer.device = 'cpu'
 
     # Objects for LBANN model
@@ -94,7 +93,7 @@ def construct_model(lbann):
 
     # Construct model
     return lbann.Model(num_epochs,
-                       layers=lbann.traverse_layer_graph(input_),
+                       layers=lbann.traverse_layer_graph([images, labels]),
                        objective_function=loss,
                        metrics=metrics,
                        callbacks=callbacks)
diff --git a/bamboo/unit_tests/test_unit_reconstruction_loss.py b/bamboo/unit_tests/test_unit_reconstruction_loss.py
index 5b9ffe008fb..d8a05d3edc8 100644
--- a/bamboo/unit_tests/test_unit_reconstruction_loss.py
+++ b/bamboo/unit_tests/test_unit_reconstruction_loss.py
@@ -15,8 +15,8 @@ def skeleton_jag_reconstruction_loss(cluster, dir_name,
         num_processes=32,
         disable_cuda=1,
         dir_name=dir_name,
-        sample_list_train_default='/p/lustre2/brainusr/datasets/10MJAG/1M_A/100K4trainers/100Kindex.txt',
-        sample_list_test_default='/p/lustre2/brainusr/datasets/10MJAG/1M_A/100K16trainers/t1_sample_list.txt',
+        sample_list_train_default='/p/vast1/lbann/datasets/JAG/10MJAG/1M_A/100K4trainers/100Kindex.txt',
+        sample_list_test_default='/p/vast1/lbann/datasets/JAG/10MJAG/1M_A/100K16trainers/t1_sample_list.txt',
         data_reader_name='jag',
         data_reader_percent='prototext',
         metadata='applications/physics/data/jag_100M_metadata.prototext',
diff --git a/bamboo/unit_tests/test_unit_subgraph_cross_grid_slice.py b/bamboo/unit_tests/test_unit_subgraph_cross_grid_slice.py
index 6a90255373c..c4c310ded2c 100644
--- a/bamboo/unit_tests/test_unit_subgraph_cross_grid_slice.py
+++ b/bamboo/unit_tests/test_unit_subgraph_cross_grid_slice.py
@@ -66,7 +66,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_dims)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_dims)))
diff --git a/bamboo/unit_tests/test_unit_subgraph_cross_grid_sum.py b/bamboo/unit_tests/test_unit_subgraph_cross_grid_sum.py
index 9cad2771004..099c082c007 100644
--- a/bamboo/unit_tests/test_unit_subgraph_cross_grid_sum.py
+++ b/bamboo/unit_tests/test_unit_subgraph_cross_grid_sum.py
@@ -66,7 +66,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_dims)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_dims)))
diff --git a/bamboo/unit_tests/test_unit_subgraph_slice_concat.py b/bamboo/unit_tests/test_unit_subgraph_slice_concat.py
index 5aae293d523..e43bef9ca71 100644
--- a/bamboo/unit_tests/test_unit_subgraph_slice_concat.py
+++ b/bamboo/unit_tests/test_unit_subgraph_slice_concat.py
@@ -66,7 +66,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_dims)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_dims)))
diff --git a/bamboo/unit_tests/test_unit_subgraph_slice_sum.py b/bamboo/unit_tests/test_unit_subgraph_slice_sum.py
index 462f3d83fc7..1a4f8d423f3 100644
--- a/bamboo/unit_tests/test_unit_subgraph_slice_sum.py
+++ b/bamboo/unit_tests/test_unit_subgraph_slice_sum.py
@@ -66,7 +66,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_dims)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_dims)))
diff --git a/bamboo/unit_tests/test_unit_subgraph_split_sum.py b/bamboo/unit_tests/test_unit_subgraph_split_sum.py
index 57f4e7c71b2..af4da2183ef 100644
--- a/bamboo/unit_tests/test_unit_subgraph_split_sum.py
+++ b/bamboo/unit_tests/test_unit_subgraph_split_sum.py
@@ -64,7 +64,7 @@ def construct_model(lbann):
     x_weights = lbann.Weights(optimizer=lbann.SGD(),
                               initializer=lbann.ConstantInitializer(value=0.0),
                               name='input_weights')
-    x = lbann.Sum(lbann.Reshape(lbann.Input(),
+    x = lbann.Sum(lbann.Reshape(lbann.Input(data_field='samples'),
                                 dims=tools.str_list(_sample_size)),
                   lbann.WeightsLayer(weights=x_weights,
                                      dims=tools.str_list(_sample_size)))
diff --git a/include/lbann/data_coordinator/buffered_data_coordinator.hpp b/include/lbann/data_coordinator/buffered_data_coordinator.hpp
index c87306a7554..8e867bd17fd 100644
--- a/include/lbann/data_coordinator/buffered_data_coordinator.hpp
+++ b/include/lbann/data_coordinator/buffered_data_coordinator.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -106,10 +106,10 @@ class buffered_data_coordinator : public data_coordinator {
   /** Archive for checkpoint and restart */
   template <class Archive> void serialize( Archive & ar );
 
-  void setup(
-    thread_pool& io_thread_pool,
-    int max_mini_batch_size,
-    std::map<execution_mode, generic_data_reader *> data_readers) override;
+  /** @brief After registering the active data field, allocate storage for each
+   *  data field in the context maps within the double buffer.
+   */
+  void register_active_data_field(data_field_type const data_field) override;
 
   void fp_setup_data(data_buffer<IODataType>& buffer, El::Int cur_mini_batch_size);
 
@@ -133,9 +133,9 @@ class buffered_data_coordinator : public data_coordinator {
   const data_buffer<IODataType>& get_data_buffer(const data_buffer_map_t& buffer_map, const execution_mode mode) const;
   data_buffer<IODataType>& get_data_buffer(data_buffer_map_t& buffer_map, const execution_mode mode);
 
-
   void distribute_from_local_matrix(execution_mode mode,
-                                    std::map<input_data_type, AbsDistMatrixType*>& input_buffers);
+                                    data_field_type data_field,
+                                    AbsDistMatrixType& input_buffer);
 
 protected:
   int fetch_to_local_matrix(data_buffer_map_t& buffer_map, const execution_mode mode);
@@ -167,6 +167,10 @@ class buffered_data_coordinator : public data_coordinator {
   bool load_from_checkpoint_distributed(persist& p) override;
 
 protected:
+  /** @brief After a data field has been registered with the data
+   *  coordinator setup its buffers. Note this can be called after
+   *  each call to register_active_data_field. */
+  void setup_data_fields(int max_mini_batch_size);
 
   /**
    * Map from execution context to the index of the active data buffer
diff --git a/include/lbann/data_coordinator/data_coordinator.hpp b/include/lbann/data_coordinator/data_coordinator.hpp
index 404bc14e46b..d4a74c9847b 100644
--- a/include/lbann/data_coordinator/data_coordinator.hpp
+++ b/include/lbann/data_coordinator/data_coordinator.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -105,8 +105,15 @@ class data_coordinator {
   /** Archive for checkpoint and restart */
   template <class Archive> void serialize( Archive & ar );
 
+  /** Setup the thread pool and data readers within the data coordinator */
   virtual void setup(thread_pool& io_thread_pool, int max_mini_batch_size, std::map<execution_mode, generic_data_reader *> data_readers);
 
+  /** Once all of the models that are served by this data coordinator are
+   *  setup and have registered which data fields are required, setup the local
+   *  buffers in the data coordinator for each data field.
+   */
+  virtual void setup_data_fields(int max_mini_batch_size) = 0;
+
   void set_trainer(trainer &trainer) { m_trainer = &trainer; }
 
   /** Check to see if there is a valid training context for the data coordinator */
@@ -280,6 +287,31 @@ class data_coordinator {
     return flag;
   }
 
+  /**
+   * Get the linearized size of the underlying data.
+   */
+  long get_linearized_size(data_field_type const& data_field) const
+  {
+    long linearized_size = -1;
+    for (auto mode : execution_mode_iterator()) {
+      if (generic_data_reader const* const dr = get_data_reader(mode)) {
+        long tmp_size = dr->get_linearized_size(data_field);
+        if (linearized_size != -1 && linearized_size != tmp_size) {
+          LBANN_ERROR(
+            "data_coordinator: ",
+            to_string(mode),
+            " data set size (",
+            std::to_string(tmp_size),
+            ") does not match the currently established data set size (",
+            std::to_string(linearized_size),
+            ")");
+        }
+        linearized_size = tmp_size;
+      }
+    }
+    return linearized_size;
+  }
+
   /**
    * Get the linearized size of the underlying data.
    */
@@ -439,6 +471,11 @@ class data_coordinator {
     return at_new_epoch(execution_mode::training);
   }
 
+  virtual void register_active_data_field(data_field_type const data_field)
+  {
+    m_active_data_fields.insert(data_field);
+  }
+
   //************************************************************************
   //
   //************************************************************************
@@ -456,11 +493,14 @@ class data_coordinator {
   /** Pointer to LBANN communicator. */
   lbann_comm *m_comm;
 
+  /// Datasets hold the active statistics and metadata for each data reader
   dataset_map_t m_datasets;
 
   data_reader_map_t m_data_readers;
  //  std::map<execution_mode, dataset_stats> m_dataset_stats;
 
+  std::set<data_field_type> m_active_data_fields;
+
 public:  // @todo BVE FIXME
   bool m_data_set_processed;
   std::mutex dr_mutex;
diff --git a/include/lbann/data_coordinator/data_coordinator_metadata.hpp b/include/lbann/data_coordinator/data_coordinator_metadata.hpp
index 8206a4a11a5..e0469918113 100644
--- a/include/lbann/data_coordinator/data_coordinator_metadata.hpp
+++ b/include/lbann/data_coordinator/data_coordinator_metadata.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -38,6 +38,7 @@
 
 namespace lbann {
 
+// BVE FIXME
 // NA - Not applicable, used for input layers that don't produce a second output
 enum class data_reader_target_mode {CLASSIFICATION, REGRESSION, RECONSTRUCTION, LABEL_RECONSTRUCTION, INPUT, NA};
 std::string to_string(data_reader_target_mode m);
diff --git a/include/lbann/data_coordinator/io_data_buffer.hpp b/include/lbann/data_coordinator/io_data_buffer.hpp
index f25dd658bfd..1689d7fcd2a 100644
--- a/include/lbann/data_coordinator/io_data_buffer.hpp
+++ b/include/lbann/data_coordinator/io_data_buffer.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -46,7 +46,7 @@ class data_buffer {
   /** Number of samples in the current mini-batch */
   int m_num_samples_fetched;
   /** Distributed matrix used to stage local data to layer output */
-  std::map<input_data_type, std::unique_ptr<AbsDistMatrixType>> m_input_buffers;
+  std::map<data_field_type, std::unique_ptr<AbsDistMatrixType>> m_input_buffers;
   std::atomic<bool> m_fetch_data_in_background;
   std::future<void> m_data_fetch_future;
   /// 1-D Matrix of which indices were fetched in this mini-batch
@@ -56,16 +56,6 @@ class data_buffer {
     m_num_samples_fetched(0), m_fetch_data_in_background(false)
   {
     m_input_buffers.clear();
-    // Create an empty buffer for each type of input data
-    // @todo BVE this should be tailored to only create buffers needed
-    //    by the data reader
-    for(auto idt : input_data_type_iterator()) {
-      m_input_buffers[idt].reset(new StarVCMatDT<TensorDataType, El::Device::CPU>(comm->get_trainer_grid()));
-#if defined(LBANN_HAS_GPU)
-      // Pin the memory so that we get efficient GPU data transfer
-      m_input_buffers[idt]->Matrix().SetMemoryMode(1);
-#endif // LBANN_HAS_GPU
-    }
   }
 
   data_buffer(const data_buffer& other) :
@@ -93,6 +83,11 @@ class data_buffer {
   /** Archive for checkpoint and restart */
   template <class Archive> void serialize( Archive & ar );
 
+  /** @brief Create a data parallel distributed matrix to hold the input data
+   * for the field */
+  void initialize_buffer_for_data_field(data_field_type const data_field,
+                                        lbann_comm* comm);
+
   void set_fetch_data_in_background(bool flag) { m_fetch_data_in_background = flag; }
 
   bool is_data_fetched_in_background() const { return m_fetch_data_in_background; }
diff --git a/include/lbann/data_coordinator/io_data_buffer_impl.hpp b/include/lbann/data_coordinator/io_data_buffer_impl.hpp
index a7f24b8e113..93b1ab52bc4 100644
--- a/include/lbann/data_coordinator/io_data_buffer_impl.hpp
+++ b/include/lbann/data_coordinator/io_data_buffer_impl.hpp
@@ -40,6 +40,23 @@ void data_buffer<TensorDataType>::serialize( Archive & ar ) {
      CEREAL_NVP(m_indices_fetched_per_mb)*/);
 }
 
+template <typename TensorDataType>
+void data_buffer<TensorDataType>::initialize_buffer_for_data_field(
+  data_field_type const data_field,
+  lbann_comm* comm)
+{
+  // Allocate a buffer if the data field doesn't exist
+  if (m_input_buffers.find(data_field) == m_input_buffers.end()) {
+    m_input_buffers[data_field] =
+      make_unique<StarVCMatDT<TensorDataType, El::Device::CPU>>(
+        comm->get_trainer_grid());
+#if defined(LBANN_HAS_GPU)
+    // Pin the memory so that we get efficient GPU data transfer
+    m_input_buffers[data_field]->Matrix().SetMemoryMode(1);
+#endif // LBANN_HAS_GPU
+  }
+}
+
 } // namespace lbann
 
 
diff --git a/include/lbann/data_readers/data_reader.hpp b/include/lbann/data_readers/data_reader.hpp
index f65b18b4c9d..46d94643454 100644
--- a/include/lbann/data_readers/data_reader.hpp
+++ b/include/lbann/data_readers/data_reader.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -30,17 +30,17 @@
 #define LBANN_DATA_READER_HPP
 
 #include "lbann/base.hpp"
+#include "lbann/comm.hpp"
 #include "lbann/data_coordinator/data_coordinator_metadata.hpp"
-#include "lbann/utils/random_number_generators.hpp"
 #include "lbann/data_readers/utils/input_data_type.hpp"
-#include "lbann/utils/exception.hpp"
-#include "lbann/comm.hpp"
 #include "lbann/io/file_io.hpp"
 #include "lbann/io/persist.hpp"
-#include "lbann/utils/options.hpp"
 #include "lbann/transforms/transform_pipeline.hpp"
-#include "lbann/utils/distconv.hpp"
 #include "lbann/utils/argument_parser.hpp"
+#include "lbann/utils/distconv.hpp"
+#include "lbann/utils/exception.hpp"
+#include "lbann/utils/options.hpp"
+#include "lbann/utils/random_number_generators.hpp"
 
 #include <cassert>
 #include <algorithm>
@@ -76,40 +76,48 @@ class generic_data_reader {
   /**
    * ctor
    */
-  generic_data_reader(bool shuffle = true) :
-    m_verbose(global_argument_parser().get<bool>(VERBOSE)),
-    m_data_store(nullptr),
-    m_comm(nullptr),
-    m_mini_batch_size(0), m_current_pos(0),
-    m_stride_to_next_mini_batch(0), m_base_offset(0), m_model_offset(0),
-    m_sample_stride(1), m_iteration_stride(1),
-    m_last_mini_batch_size(0),
-    m_stride_to_last_mini_batch(0),
-    m_reset_mini_batch_index(0),
-    m_loaded_mini_batch_idx(0),
-    m_current_mini_batch_idx(0),
-    m_num_iterations_per_epoch(0), m_global_mini_batch_size(0),
-    m_global_last_mini_batch_size(0),
-    m_world_master_mini_batch_adjustment(0),
-    m_num_parallel_readers(0), m_rank_in_model(0),
-    m_max_files_to_load(0),
-    m_file_dir(""), m_data_sample_list(""), m_data_fn(""), m_label_fn(""),
-    m_shuffle(shuffle), m_absolute_sample_count(0),
-    m_use_percent(1.0),
-    m_master(false),
-    m_gan_labelling(false), //default, not GAN
-    m_gan_label_value(0),  //If GAN, default for fake label, discriminator model
-    m_io_thread_pool(nullptr),
-    m_jag_partitioned(false),
-    m_keep_sample_order(false),
-    m_trainer(nullptr),
-    m_issue_warning(true)
+  generic_data_reader(bool shuffle = true)
+    : m_verbose(global_argument_parser().get<bool>(VERBOSE)),
+      m_data_store(nullptr),
+      m_comm(nullptr),
+      m_mini_batch_size(0),
+      m_current_pos(0),
+      m_stride_to_next_mini_batch(0),
+      m_base_offset(0),
+      m_model_offset(0),
+      m_sample_stride(1),
+      m_iteration_stride(1),
+      m_last_mini_batch_size(0),
+      m_stride_to_last_mini_batch(0),
+      m_reset_mini_batch_index(0),
+      m_loaded_mini_batch_idx(0),
+      m_current_mini_batch_idx(0),
+      m_num_iterations_per_epoch(0),
+      m_global_mini_batch_size(0),
+      m_global_last_mini_batch_size(0),
+      m_world_master_mini_batch_adjustment(0),
+      m_num_parallel_readers(0),
+      m_rank_in_model(0),
+      m_max_files_to_load(0),
+      m_file_dir(""),
+      m_data_sample_list(""),
+      m_data_fn(""),
+      m_label_fn(""),
+      m_shuffle(shuffle),
+      m_absolute_sample_count(0),
+      m_use_percent(1.0),
+      m_master(false),
+      m_gan_labelling(false), // default, not GAN
+      m_gan_label_value(
+        0), // If GAN, default for fake label, discriminator model
+      m_io_thread_pool(nullptr),
+      m_jag_partitioned(false),
+      m_keep_sample_order(false),
+      m_trainer(nullptr),
+      m_issue_warning(true)
   {
     // By default only support fetching input samples
-    for(auto i : input_data_type_iterator()) {
-      m_supported_input_types[i] = false;
-    }
-    m_supported_input_types[input_data_type::SAMPLES] = true;
+    m_supported_input_types[INPUT_DATA_TYPE_SAMPLES] = true;
   }
   generic_data_reader(const generic_data_reader&) = default;
   generic_data_reader& operator=(const generic_data_reader&) = default;
@@ -297,21 +305,41 @@ class generic_data_reader {
   virtual std::string get_type() const = 0;
 
   /** @brief Fetch a mini-batch worth of data, including samples, labels, responses (as appropriate) */
-  int fetch(std::map<input_data_type, CPUMat*>& input_buffers, El::Matrix<El::Int>& indices_fetched);
-  /// Fetch this mini-batch's samples into X.
-  virtual int fetch_data(CPUMat& X, El::Matrix<El::Int>& indices_fetched);
-  /// Fetch this mini-batch's labels into Y.
-  virtual int fetch_labels(CPUMat& Y);
-  /// Fetch this mini-batch's responses into Y.
-  virtual int fetch_responses(CPUMat& Y);
+  int fetch(std::map<data_field_type, CPUMat*>& input_buffers,
+            El::Matrix<El::Int>& indices_fetched);
 
-  virtual bool has_labels() const { return m_supported_input_types.at(input_data_type::LABELS); }
-  virtual bool has_responses() const { return m_supported_input_types.at(input_data_type::RESPONSES); }
+  /** @brief Check to see if the data reader supports this specific data field
+   */
+  virtual bool has_data_field(data_field_type data_field) const
+  {
+    if (m_supported_input_types.find(data_field) !=
+        m_supported_input_types.end()) {
+      return m_supported_input_types.at(data_field);
+    }
+    else {
+      return false;
+    }
+  }
+
+  virtual bool has_labels() const
+  {
+    return has_data_field(INPUT_DATA_TYPE_LABELS);
+  }
+  virtual bool has_responses() const
+  {
+    return has_data_field(INPUT_DATA_TYPE_RESPONSES);
+  }
 
   /// Whether or not a data reader has labels
-  virtual void set_has_labels(const bool b) { m_supported_input_types[input_data_type::LABELS] = b; }
+  virtual void set_has_labels(const bool b)
+  {
+    m_supported_input_types[INPUT_DATA_TYPE_LABELS] = b;
+  }
   /// Whether or not a data reader has a response field
-  virtual void set_has_responses(const bool b) { m_supported_input_types[input_data_type::RESPONSES] = b; }
+  virtual void set_has_responses(const bool b)
+  {
+    m_supported_input_types[INPUT_DATA_TYPE_RESPONSES] = b;
+  }
 
   /**
    * During the network's update phase, the data reader will
@@ -350,14 +378,20 @@ class generic_data_reader {
     return 1;
   }
   /// get the linearized size of what is identified by desc.
-  virtual int get_linearized_size(const std::string& desc) const {
-    if (desc == "data") {
+  virtual int get_linearized_size(data_field_type const& data_field) const
+  {
+    if (data_field == INPUT_DATA_TYPE_SAMPLES) {
       return get_linearized_data_size();
-    } else if (desc == "label") {
+    }
+    else if (data_field == INPUT_DATA_TYPE_LABELS) {
       return get_linearized_label_size();
-    } else if (desc == "response") {
+    }
+    else if (data_field == INPUT_DATA_TYPE_RESPONSES) {
       return get_linearized_response_size();
     }
+    else {
+      LBANN_ERROR("Unknown data_field_type value provided: " + data_field);
+    }
     return 0;
   }
   /// Get the dimensions of the data.
@@ -724,7 +758,12 @@ class generic_data_reader {
 
   lbann_comm *m_comm;
 
-  virtual bool fetch_data_block(CPUMat& X, El::Int block_offset, El::Int block_stride, El::Int mb_size, El::Matrix<El::Int>& indices_fetched);
+  virtual bool
+  fetch_data_block(std::map<data_field_type, CPUMat*>& input_buffers,
+                   El::Int block_offset,
+                   El::Int block_stride,
+                   El::Int mb_size,
+                   El::Matrix<El::Int>& indices_fetched);
 
   /**
    * Fetch a single sample into a matrix.
@@ -857,7 +896,7 @@ class generic_data_reader {
 
   /** @brief Holds a true value for each input data type that is supported.
    *  Use an ordered map so that checkpoints are stable. */
-  std::map<input_data_type, bool> m_supported_input_types;
+  std::map<data_field_type, bool> m_supported_input_types;
 
   //var to support GAN
   bool m_gan_labelling; //boolean flag of whether its GAN binary label, default is false
diff --git a/include/lbann/data_readers/data_reader_HDF5.hpp b/include/lbann/data_readers/data_reader_HDF5.hpp
index 8080cfed5d1..d1ec450e832 100644
--- a/include/lbann/data_readers/data_reader_HDF5.hpp
+++ b/include/lbann/data_readers/data_reader_HDF5.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -27,8 +27,8 @@
 #define LBANN_DATA_READER_HDF5_REVISED_HPP
 
 #include "lbann/data_readers/data_reader.hpp"
-#include "lbann/data_readers/sample_list_hdf5.hpp"
 #include "lbann/data_readers/data_reader_sample_list.hpp"
+#include "lbann/data_readers/sample_list_hdf5.hpp"
 #include "lbann/data_store/data_store_conduit.hpp"
 
 // Forward declaration
@@ -71,7 +71,7 @@ class hdf5_data_reader : public data_reader_sample_list<sample_list_hdf5<std::st
    * in lbann (datum, label, response); in general, it can be
    * any pack field in the experiment schema: pack: <string>
    */
-  bool fetch(std::string which, CPUMat& Y, int data_id, int mb_idx);
+  bool fetch(data_field_type data_field, CPUMat& Y, int data_id, int mb_idx);
 
   bool fetch_datum(CPUMat& X, int data_id, int mb_idx) override
   {
@@ -249,7 +249,7 @@ class hdf5_data_reader : public data_reader_sample_list<sample_list_hdf5<std::st
    *  which is one of: float32, float64, int32, int64, uint64, uint32
    */
   const void* get_data(const size_t sample_id_in,
-                       std::string field_name_in,
+                       data_field_type data_field,
                        size_t& num_elts_out,
                        std::string& dtype_out) const;
 
diff --git a/include/lbann/data_readers/data_reader_csv.hpp b/include/lbann/data_readers/data_reader_csv.hpp
index 2d1e3ab3627..1d244971d41 100644
--- a/include/lbann/data_readers/data_reader_csv.hpp
+++ b/include/lbann/data_readers/data_reader_csv.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -63,12 +63,12 @@ class csv_reader : public generic_data_reader {
   void set_response_col(int col) { m_response_col = col; }
   /// Disable fetching labels.
   void disable_labels(bool b = true) {
-    m_supported_input_types[input_data_type::LABELS] = false;
+    m_supported_input_types[INPUT_DATA_TYPE_LABELS] = false;
     m_disable_labels = b;
   }
   /// Enable fetching responses (disabled by default).
   void enable_responses(bool b = false) {
-    m_supported_input_types[input_data_type::RESPONSES] = true;
+    m_supported_input_types[INPUT_DATA_TYPE_RESPONSES] = true;
     m_disable_responses = b;
   }
   /// Set the column separator (default is ',').
diff --git a/include/lbann/data_readers/data_reader_jag_conduit.hpp b/include/lbann/data_readers/data_reader_jag_conduit.hpp
index 508df9022df..a0bd786b23d 100644
--- a/include/lbann/data_readers/data_reader_jag_conduit.hpp
+++ b/include/lbann/data_readers/data_reader_jag_conduit.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -172,12 +172,6 @@ class data_reader_jag_conduit : public generic_data_reader {
   bool has_list_per_trainer() const override { return m_list_per_trainer; }
 
 
-  /// Fetch data of a mini-batch or reuse it from the cache of the leading reader
-  int fetch_data(CPUMat& X, El::Matrix<El::Int>& indices_fetched) override;
-  /// Fetch responses of a mini-batch or reuse it from the cache of the leading reader
-  int fetch_responses(CPUMat& Y) override;
-  /// Fetch labels of a mini-batch or reuse it from the cache of the leading reader
-  int fetch_labels(CPUMat& Y) override;
 
   /// Return the number of measurement views
   unsigned int get_num_img_srcs() const;
@@ -282,11 +276,6 @@ class data_reader_jag_conduit : public generic_data_reader {
     create_datum_views(CPUMat& X, const std::vector<size_t>& sizes, const int mb_idx) const;
 
   /// Export cached data minibatch
-  int reuse_data(CPUMat& X);
-  /// Export cached responses minibatch
-  int reuse_responses(CPUMat& Y);
-  /// Export cached labels minibatch
-  int reuse_labels(CPUMat& Y);
 
   bool fetch(CPUMat& X, int data_id, conduit::Node& sample, int mb_idx, int tid,
              const variable_t vt, const std::string tag);
diff --git a/include/lbann/data_readers/data_reader_numpy.hpp b/include/lbann/data_readers/data_reader_numpy.hpp
index 6c1372af20a..5fab17e4661 100644
--- a/include/lbann/data_readers/data_reader_numpy.hpp
+++ b/include/lbann/data_readers/data_reader_numpy.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -23,7 +23,8 @@
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
 //
-// lbann_data_reader_numpy .hpp .cpp - generic_data_reader class for numpy dataset
+// lbann_data_reader_numpy .hpp .cpp - generic_data_reader class for numpy
+// dataset
 ////////////////////////////////////////////////////////////////////////////////
 
 #ifndef LBANN_DATA_READER_NUMPY_HPP
@@ -64,7 +65,8 @@ class numpy_reader : public generic_data_reader {
   const std::vector<int> get_data_dims() const override {
     std::vector<int> dims(m_data.shape.begin() + 1,
                           m_data.shape.end());
-    if (m_supported_input_types.at(input_data_type::LABELS) || m_supported_input_types.at(input_data_type::RESPONSES)) {
+    if (m_supported_input_types.at(INPUT_DATA_TYPE_LABELS) ||
+        m_supported_input_types.at(INPUT_DATA_TYPE_RESPONSES)) {
       dims.back() -= 1;
     }
     return dims;
diff --git a/include/lbann/data_readers/data_reader_python.hpp b/include/lbann/data_readers/data_reader_python.hpp
index 0ea6d28cb84..2d8fe03825a 100644
--- a/include/lbann/data_readers/data_reader_python.hpp
+++ b/include/lbann/data_readers/data_reader_python.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -59,7 +59,7 @@ class python_reader : public generic_data_reader {
   void load() override;
 
 protected:
-  bool fetch_data_block(CPUMat& X,
+  bool fetch_data_block(std::map<data_field_type, CPUMat*>& input_buffers,
                         El::Int block_offset,
                         El::Int block_stride,
                         El::Int mb_size,
diff --git a/include/lbann/data_readers/data_reader_sample_list_impl.hpp b/include/lbann/data_readers/data_reader_sample_list_impl.hpp
index 3b3f05e6f00..fa8c0c33029 100644
--- a/include/lbann/data_readers/data_reader_sample_list_impl.hpp
+++ b/include/lbann/data_readers/data_reader_sample_list_impl.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/data_reader_synthetic.hpp b/include/lbann/data_readers/data_reader_synthetic.hpp
index 83faee1e793..caa54fa7cf8 100644
--- a/include/lbann/data_readers/data_reader_synthetic.hpp
+++ b/include/lbann/data_readers/data_reader_synthetic.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -30,6 +30,9 @@
 
 #include "data_reader.hpp"
 
+// Forward declaration
+class DataReaderSyntheticWhiteboxTester;
+
 namespace lbann {
 
 /**
@@ -83,7 +86,10 @@ class data_reader_synthetic : public generic_data_reader {
   bool fetch_label(CPUMat& Y, int data_id, int mb_idx) override;
   bool fetch_response(CPUMat& Y, int data_id, int mb_idx) override;
 
- private:
+  // Designate a whitebox testing friend
+  friend class ::DataReaderSyntheticWhiteboxTester;
+
+private:
   /** Number of samples in the dataset. */
   int m_num_samples;
   /** Number of labels in the dataset. */
diff --git a/include/lbann/data_readers/sample_list_impl.hpp b/include/lbann/data_readers/sample_list_impl.hpp
index 60a9aea1f7f..8fa664f5b62 100644
--- a/include/lbann/data_readers/sample_list_impl.hpp
+++ b/include/lbann/data_readers/sample_list_impl.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/data_readers/sample_list_open_files_impl.hpp b/include/lbann/data_readers/sample_list_open_files_impl.hpp
index dfe1319c871..0e03cecb6fe 100644
--- a/include/lbann/data_readers/sample_list_open_files_impl.hpp
+++ b/include/lbann/data_readers/sample_list_open_files_impl.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -805,10 +805,10 @@ ::manage_open_file_handles(sample_file_id_t id) {
       clear_file_handle(victim_fd);
     }
 
-    /// Before we can enqueue the any new access times for this descriptor, remove any
-    /// earlier descriptor
+    /// Before we can enqueue the any new access times for this descriptor,
+    /// remove any earlier descriptor
     std::sort_heap(m_open_fd_pq.begin(), m_open_fd_pq.end(), pq_cmp);
-    if(m_open_fd_pq.front().first == id) {
+    if (m_open_fd_pq.front().first == id) {
       m_open_fd_pq.pop_front();
     }
     std::make_heap(m_open_fd_pq.begin(), m_open_fd_pq.end(), pq_cmp);
diff --git a/include/lbann/data_readers/utils/input_data_type.hpp b/include/lbann/data_readers/utils/input_data_type.hpp
index 5c536be17ad..6473ef6e1de 100644
--- a/include/lbann/data_readers/utils/input_data_type.hpp
+++ b/include/lbann/data_readers/utils/input_data_type.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -27,15 +27,14 @@
 #ifndef LBANN_INPUT_DATA_TYPE_HPP_INCLUDED
 #define LBANN_INPUT_DATA_TYPE_HPP_INCLUDED
 
-#include "lbann/utils/enum_iterator.hpp"
 #include <string>
 
 namespace lbann {
 
-enum class input_data_type {SAMPLES, LABELS, RESPONSES};
-using input_data_type_iterator = enum_iterator<input_data_type, input_data_type::SAMPLES, input_data_type::RESPONSES>;
-std::string to_string(input_data_type const& idt);
-
+using data_field_type = std::string;
+#define INPUT_DATA_TYPE_SAMPLES "samples"
+#define INPUT_DATA_TYPE_LABELS "labels"
+#define INPUT_DATA_TYPE_RESPONSES "responses"
 }
 
 #endif // LBANN_INPUT_DATA_TYPE_HPP_INCLUDED
diff --git a/include/lbann/execution_algorithms/batch_functional_inference_algorithm.hpp b/include/lbann/execution_algorithms/batch_functional_inference_algorithm.hpp
index fe9a23c3a00..205dd69361d 100644
--- a/include/lbann/execution_algorithms/batch_functional_inference_algorithm.hpp
+++ b/include/lbann/execution_algorithms/batch_functional_inference_algorithm.hpp
@@ -37,7 +37,7 @@
 
 namespace lbann {
 
-/** @brief Class for LBANN batch inference algorithms. 
+/** @brief Class for LBANN batch inference algorithms.
  *
  *  This execution algorithm is meant for running inference using a trained
  *  model and samples passed by the user from an external application.  The
@@ -136,7 +136,7 @@ class batch_functional_inference_algorithm {
    */
   void get_labels(model& model,\
                   El::Matrix<int, El::Device::CPU> &labels) {
-    int pred_label;
+    int pred_label = 0;
     float max, col_value;
 
     for (const auto* l : model.get_layers()) {
diff --git a/include/lbann/layers/io/input_layer.hpp b/include/lbann/layers/io/input_layer.hpp
index 8f21dff9dbd..0571769323e 100644
--- a/include/lbann/layers/io/input_layer.hpp
+++ b/include/lbann/layers/io/input_layer.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -27,11 +27,12 @@
 #ifndef LBANN_LAYERS_INPUT_LAYER_HPP_INCLUDED
 #define LBANN_LAYERS_INPUT_LAYER_HPP_INCLUDED
 
-#include "lbann/layers/data_type_layer.hpp"
 #include "lbann/data_coordinator/buffered_data_coordinator.hpp"
-#include "lbann/utils/exception.hpp"
-#include "lbann/utils/distconv.hpp"
+#include "lbann/data_readers/utils/input_data_type.hpp"
+#include "lbann/layers/data_type_layer.hpp"
 #include "lbann/models/model.hpp"
+#include "lbann/utils/distconv.hpp"
+#include "lbann/utils/exception.hpp"
 
 namespace lbann {
 
@@ -105,21 +106,14 @@ class input_layer : public data_type_layer<TensorDataType> {
  public:
 
   /// @todo make the map and vector references
-  input_layer(lbann_comm *comm,
-              data_reader_target_mode dr_mode = data_reader_target_mode::NA)
-    : data_type_layer<TensorDataType>(comm),
-    m_data_reader_mode(dr_mode) {
-
-    // Input layers have no parents
-    this->m_expected_num_parent_layers = 0;
-    if(dr_mode == data_reader_target_mode::NA) {
-      this->m_expected_num_child_layers = 1;
-    }else {
-      // Input layers output a sample and target, which could be the
-      // original value, categorical label, or regression value
-      this->m_expected_num_child_layers = 2;
-    }
-  }
+   input_layer(lbann_comm* comm, std::string const data_field = "")
+     : data_type_layer<TensorDataType>(comm), m_data_field(data_field)
+   {
+
+     // Input layers have no parents
+     this->m_expected_num_parent_layers = 0;
+     this->m_expected_num_child_layers = 1;
+   }
 
   input_layer(const input_layer&) = default;
   input_layer& operator=(const input_layer&) = default;
@@ -156,10 +150,6 @@ class input_layer : public data_type_layer<TensorDataType> {
    */
   std::vector<int> get_data_dims(DataReaderMetaData& dr_metadata, int child_index = 0) const;
 
-  bool is_for_regression() const {
-    return (m_data_reader_mode == data_reader_target_mode::REGRESSION);
-  }
-
   /** @name Serialization */
   ///@{
 
@@ -167,19 +157,17 @@ class input_layer : public data_type_layer<TensorDataType> {
   void serialize(ArchiveT& ar);
 
   ///@}
- protected:
-  data_reader_target_mode m_data_reader_mode;
 
  private:
   friend cereal::access;
-  input_layer()
-    : input_layer(nullptr, data_reader_target_mode::NA)
-  {}
+  input_layer() : input_layer(nullptr) {}
 
   // This is to track if samples are loaded with set_samples(), if so the
   // fp_compute() sample loading is no longer necessary
   bool m_samples_loaded = false;
 
+  data_field_type m_data_field;
+
 #ifdef LBANN_HAS_DISTCONV
  public:
   /** @brief Extensions for distributed convolutions */
diff --git a/include/lbann/proto/proto_common.hpp b/include/lbann/proto/proto_common.hpp
index 04a4491aa58..0f0e29afe2d 100644
--- a/include/lbann/proto/proto_common.hpp
+++ b/include/lbann/proto/proto_common.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/argument_parser.hpp b/include/lbann/utils/argument_parser.hpp
index 600e083aafd..256292917e9 100644
--- a/include/lbann/utils/argument_parser.hpp
+++ b/include/lbann/utils/argument_parser.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -770,8 +770,7 @@ auto argument_parser<ErrorHandler>::add_flag_impl_(
 
 }// namespace utils
 
-using default_arg_parser_type =
-         utils::argument_parser<utils::strict_parsing>;
+using default_arg_parser_type = utils::argument_parser<utils::strict_parsing>;
 
 default_arg_parser_type& global_argument_parser();
 
diff --git a/include/lbann/utils/dnn_lib/cudnn/convolution.hpp b/include/lbann/utils/dnn_lib/cudnn/convolution.hpp
index 420c3ac8da8..bda0faee15d 100644
--- a/include/lbann/utils/dnn_lib/cudnn/convolution.hpp
+++ b/include/lbann/utils/dnn_lib/cudnn/convolution.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/include/lbann/utils/dnn_lib/miopen/convolution.hpp b/include/lbann/utils/dnn_lib/miopen/convolution.hpp
index c6f32b224c6..ccffb5868b1 100644
--- a/include/lbann/utils/dnn_lib/miopen/convolution.hpp
+++ b/include/lbann/utils/dnn_lib/miopen/convolution.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -89,11 +89,11 @@ get_bwd_weights_conv_workspace_size(TensorDescriptor const& dyDesc,
   auto handle_manager = internal::make_default_handle_manager(si);
   CHECK_MIOPEN(
     miopenConvolutionBackwardWeightsGetWorkSpaceSize(handle_manager.get(),
-                                                    dyDesc,
-                                                    xDesc,
-                                                    convDesc,
-                                                    dwDesc,
-                                                    &size));
+                                                     dyDesc,
+                                                     xDesc,
+                                                     convDesc,
+                                                     dwDesc,
+                                                     &size));
   return size;
 }
 
diff --git a/include/lbann/utils/lbann_library.hpp b/include/lbann/utils/lbann_library.hpp
index 5b444becb26..b90f45509ac 100644
--- a/include/lbann/utils/lbann_library.hpp
+++ b/include/lbann/utils/lbann_library.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -68,11 +68,12 @@ int allocate_trainer_resources(lbann_comm *comm);
 
 // The constructed trainer has global scope. This returns a reference
 // to this global trainer.
-trainer& construct_trainer(lbann_comm *comm,
+trainer& construct_trainer(lbann_comm* comm,
                            lbann_data::Trainer* pb_trainer,
-                           lbann_data::LbannPB &pb);
+                           lbann_data::LbannPB& pb);
 
-std::unique_ptr<thread_pool> construct_io_thread_pool(lbann_comm *comm, bool serialized_io);
+std::unique_ptr<thread_pool> construct_io_thread_pool(lbann_comm* comm,
+                                                      bool serialized_io);
 
 std::unique_ptr<model> build_model_from_prototext(
     int argc, char **argv,
diff --git a/include/lbann/utils/protobuf_utils.hpp b/include/lbann/utils/protobuf_utils.hpp
index 8f1083bddfb..64dc1f3aa84 100644
--- a/include/lbann/utils/protobuf_utils.hpp
+++ b/include/lbann/utils/protobuf_utils.hpp
@@ -50,8 +50,8 @@ load_prototext(
  *  specification.
  */
 std::vector<prototext_fn_triple>
-parse_prototext_filenames_from_command_line(
-  const bool master, const int trainer_rank=0);
+parse_prototext_filenames_from_command_line(const bool master,
+                                            const int trainer_rank = 0);
 
 std::vector<std::unique_ptr<lbann_data::LbannPB>>
 read_in_prototext_files(
diff --git a/include/lbann/utils/threads/thread_pool.hpp b/include/lbann/utils/threads/thread_pool.hpp
index 80d1380123a..b7d56387eb7 100644
--- a/include/lbann/utils/threads/thread_pool.hpp
+++ b/include/lbann/utils/threads/thread_pool.hpp
@@ -55,10 +55,7 @@ class thread_pool {
   thread_pool(size_type max_threads);
 
   /** @brief Destroy the threadpool */
-  ~thread_pool() {
-    all_work_done_ = true;
-    global_work_queue_.wake_all(true);
-  }
+  ~thread_pool() { reap_threads(); }
 
   /** @brief Launch the threads */
   void launch_threads(size_type num_threads);
diff --git a/model_zoo/data_readers/data_reader_jag.prototext b/model_zoo/data_readers/data_reader_jag.prototext
index e6218fa3686..704a1f090b4 100644
--- a/model_zoo/data_readers/data_reader_jag.prototext
+++ b/model_zoo/data_readers/data_reader_jag.prototext
@@ -14,7 +14,7 @@ data_reader {
     name: "jag_conduit"
     role: "train"
     shuffle: true
-    sample_list: "/p/lustre2/brainusr/datasets/10MJAG/1M_A/100K4trainers/100Kindex.txt"
+    sample_list: "/p/vast1/lbann/datasets/JAG/10MJAG/1M_A/100K4trainers/100Kindex.txt"
     sample_list_per_trainer: false
     sample_list_per_model: false
 
@@ -33,7 +33,7 @@ data_reader {
     role: "test"
     shuffle: false
     # change to a lustre path
-    sample_list: "/p/lustre2/brainusr/datasets/10MJAG/1M_A/100K16trainers/t1_sample_list.txt"
+    sample_list: "/p/vast1/lbann//datasets/JAG/10MJAG/1M_A/100K16trainers/t1_sample_list.txt"
     sample_list_per_trainer: false
     sample_list_per_model: false
 
diff --git a/model_zoo/jag_utils/build_index.cpp b/model_zoo/jag_utils/build_index.cpp
index 13eb7b1414c..26490bd277b 100644
--- a/model_zoo/jag_utils/build_index.cpp
+++ b/model_zoo/jag_utils/build_index.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -58,18 +58,18 @@ int main(int argc, char *argv[]) {
     auto& arg_parser = global_argument_parser();
     construct_std_options();
     construct_jag_options();
-		try {
-			arg_parser.parse(argc, argv);
-		}
-		catch (std::exception const& e) {
-			auto guessed_rank = guess_global_rank();
-			if (guessed_rank <= 0)
-				// Cannot call `El::ReportException` because MPI hasn't been
-				// initialized yet.
-				std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
-									<< e.what() << "\n\nProcess terminating." << std::endl;
-			std::terminate();
-		}
+    try {
+      arg_parser.parse(argc, argv);
+    }
+    catch (std::exception const& e) {
+      auto guessed_rank = guess_global_rank();
+      if (guessed_rank <= 0)
+        // Cannot call `El::ReportException` because MPI hasn't been
+        // initialized yet.
+        std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
+                  << e.what() << "\n\nProcess terminating." << std::endl;
+      std::terminate();
+    }
 
     if (argc == 1) {
       if (master) {
diff --git a/model_zoo/jag_utils/build_sample_id_mapping.cpp b/model_zoo/jag_utils/build_sample_id_mapping.cpp
index 7472201afc2..594dc425a3b 100644
--- a/model_zoo/jag_utils/build_sample_id_mapping.cpp
+++ b/model_zoo/jag_utils/build_sample_id_mapping.cpp
@@ -28,7 +28,7 @@ int main(int argc, char **argv) {
   auto& arg_parser = global_argument_parser();
   construct_std_options();
   construct_jag_options();
-	try {
+  try {
     arg_parser.parse(argc, argv);
   }
   catch (std::exception const& e) {
diff --git a/model_zoo/jag_utils/check_for_duplicate_samples.cpp b/model_zoo/jag_utils/check_for_duplicate_samples.cpp
index 1ba22ec6ca2..c6e08e63794 100644
--- a/model_zoo/jag_utils/check_for_duplicate_samples.cpp
+++ b/model_zoo/jag_utils/check_for_duplicate_samples.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -59,17 +59,17 @@ int main(int argc, char *argv[]) {
     construct_std_options();
     construct_jag_options();
     try {
-    arg_parser.parse(argc, argv);
-		}
-		catch (std::exception const& e) {
-			auto guessed_rank = guess_global_rank();
-			if (guessed_rank <= 0)
-				// Cannot call `El::ReportException` because MPI hasn't been
-				// initialized yet.
-				std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
-									<< e.what() << "\n\nProcess terminating." << std::endl;
-			std::terminate();
-		}
+      arg_parser.parse(argc, argv);
+    }
+    catch (std::exception const& e) {
+      auto guessed_rank = guess_global_rank();
+      if (guessed_rank <= 0)
+        // Cannot call `El::ReportException` because MPI hasn't been
+        // initialized yet.
+        std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
+                  << e.what() << "\n\nProcess terminating." << std::endl;
+      std::terminate();
+    }
 
     // sanity check invocation
     if (arg_parser.get<std::string>(FILELIST) == "") {
diff --git a/model_zoo/jag_utils/check_images.cpp b/model_zoo/jag_utils/check_images.cpp
index 2b33a6f4c0e..cf523e09674 100644
--- a/model_zoo/jag_utils/check_images.cpp
+++ b/model_zoo/jag_utils/check_images.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -55,18 +55,18 @@ int main(int argc, char *argv[]) {
     auto& arg_parser = global_argument_parser();
     construct_std_options();
     construct_jag_options();
-		try {
-				arg_parser.parse(argc, argv);
-			}
-			catch (std::exception const& e) {
-				auto guessed_rank = guess_global_rank();
-				if (guessed_rank <= 0)
-					// Cannot call `El::ReportException` because MPI hasn't been
-					// initialized yet.
-					std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
-										<< e.what() << "\n\nProcess terminating." << std::endl;
-				std::terminate();
-			}
+    try {
+      arg_parser.parse(argc, argv);
+    }
+    catch (std::exception const& e) {
+      auto guessed_rank = guess_global_rank();
+      if (guessed_rank <= 0)
+        // Cannot call `El::ReportException` because MPI hasn't been
+        // initialized yet.
+        std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
+                  << e.what() << "\n\nProcess terminating." << std::endl;
+      std::terminate();
+    }
 
     if (arg_parser.get<std::string>(FILELIST)) {
       if (master) {
@@ -77,7 +77,10 @@ int main(int argc, char *argv[]) {
     std::vector<std::string> files;
     std::ifstream in(arg_parser.get<std::string>(FILELIST).c_str());
     if (!in) {
-        throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to open " + arg_parser.get<std::string>(FILELIST) + " for reading");
+      throw lbann_exception(std::string{} + __FILE__ + " " +
+                            std::to_string(__LINE__) + " :: failed to open " +
+                            arg_parser.get<std::string>(FILELIST) +
+                            " for reading");
     }
     std::string line;
     while (getline(in, line)) {
diff --git a/model_zoo/jag_utils/compute_hydra_normalization.cpp b/model_zoo/jag_utils/compute_hydra_normalization.cpp
index 2b724cf1fe2..d56c2af7fd3 100644
--- a/model_zoo/jag_utils/compute_hydra_normalization.cpp
+++ b/model_zoo/jag_utils/compute_hydra_normalization.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -55,21 +55,21 @@ int main(int argc, char *argv[]) {
   bool master = comm->am_world_master();
   const int rank = comm->get_rank_in_world();
 
-    auto& arg_parser = global_argument_parser();
-    construct_std_options();
-    construct_jag_options();
-		try {
-			arg_parser.parse(argc, argv);
-		}
-		catch (std::exception const& e) {
-			auto guessed_rank = guess_global_rank();
-			if (guessed_rank <= 0)
-				// Cannot call `El::ReportException` because MPI hasn't been
-				// initialized yet.
-				std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
-									<< e.what() << "\n\nProcess terminating." << std::endl;
-			std::terminate();
-		}
+  auto& arg_parser = global_argument_parser();
+  construct_std_options();
+  construct_jag_options();
+  try {
+    arg_parser.parse(argc, argv);
+  }
+  catch (std::exception const& e) {
+    auto guessed_rank = guess_global_rank();
+    if (guessed_rank <= 0)
+      // Cannot call `El::ReportException` because MPI hasn't been
+      // initialized yet.
+      std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
+                << e.what() << "\n\nProcess terminating." << std::endl;
+    std::terminate();
+  }
 
     ofstream out("normalize.txt");
     if (!out) {
@@ -111,7 +111,8 @@ int main(int argc, char *argv[]) {
 
     ifstream in(arg_parser.get<std::string>(FILELIST).c_str());
     if (!in) {
-      LBANN_ERROR("failed to open " + arg_parser.get<std::string>(FILELIST) + " for reading");
+      LBANN_ERROR("failed to open " + arg_parser.get<std::string>(FILELIST) +
+                  " for reading");
     }
 
     size_t hhh = 0;
diff --git a/model_zoo/jag_utils/compute_min_max_images.cpp b/model_zoo/jag_utils/compute_min_max_images.cpp
index 85840554de9..95c4abf22c5 100644
--- a/model_zoo/jag_utils/compute_min_max_images.cpp
+++ b/model_zoo/jag_utils/compute_min_max_images.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -53,18 +53,18 @@ int main(int argc, char *argv[]) {
     auto& arg_parser = global_argument_parser();
     construct_std_options();
     construct_jag_options();
-		try {
-				arg_parser.parse(argc, argv);
-		}
-		catch (std::exception const& e) {
-			auto guessed_rank = guess_global_rank();
-			if (guessed_rank <= 0)
-				// Cannot call `El::ReportException` because MPI hasn't been
-				// initialized yet.
-				std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
-									<< e.what() << "\n\nProcess terminating." << std::endl;
-			std::terminate();
-		}
+    try {
+      arg_parser.parse(argc, argv);
+    }
+    catch (std::exception const& e) {
+      auto guessed_rank = guess_global_rank();
+      if (guessed_rank <= 0)
+        // Cannot call `El::ReportException` because MPI hasn't been
+        // initialized yet.
+        std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
+                  << e.what() << "\n\nProcess terminating." << std::endl;
+      std::terminate();
+    }
 
     if (arg_parser.get<std::string>(FILELIST) == "" ||
         arg_parser.get<std::string>(OUTPUT_DIR) == "") {
@@ -91,7 +91,10 @@ int main(int argc, char *argv[]) {
       std::stringstream s;
       std::ifstream in(arg_parser.get<std::string>(FILELIST).c_str());
       if (!in) {
-        throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to open " + arg_parser.get<std::string>(FILELIST) + " for reading");
+        throw lbann_exception(std::string{} + __FILE__ + " " +
+                              std::to_string(__LINE__) + " :: failed to open " +
+                              arg_parser.get<std::string>(FILELIST) +
+                              " for reading");
       }
       std::string line;
       while (getline(in, line)) {
diff --git a/model_zoo/jag_utils/compute_per_channel_image_avg_min_max.cpp b/model_zoo/jag_utils/compute_per_channel_image_avg_min_max.cpp
index d1c0a1f5372..559d9d7351b 100644
--- a/model_zoo/jag_utils/compute_per_channel_image_avg_min_max.cpp
+++ b/model_zoo/jag_utils/compute_per_channel_image_avg_min_max.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -52,18 +52,18 @@ int main(int argc, char *argv[]) {
     auto& arg_parser = global_argument_parser();
     construct_std_options();
     construct_jag_options();
-		try {
-			arg_parser.parse(argc, argv);
-		}
-		catch (std::exception const& e) {
-			auto guessed_rank = guess_global_rank();
-			if (guessed_rank <= 0)
-				// Cannot call `El::ReportException` because MPI hasn't been
-				// initialized yet.
-				std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
-									<< e.what() << "\n\nProcess terminating." << std::endl;
-			std::terminate();
-		}
+    try {
+      arg_parser.parse(argc, argv);
+    }
+    catch (std::exception const& e) {
+      auto guessed_rank = guess_global_rank();
+      if (guessed_rank <= 0)
+        // Cannot call `El::ReportException` because MPI hasn't been
+        // initialized yet.
+        std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
+                  << e.what() << "\n\nProcess terminating." << std::endl;
+      std::terminate();
+    }
 
     if (arg_parser.get<std::string>(FILELIST) == "") {
       if (master) {
@@ -78,7 +78,10 @@ int main(int argc, char *argv[]) {
       std::stringstream s;
       std::ifstream in(arg_parser.get<std::string>(FILELIST).c_str());
       if (!in) {
-        throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to open " + arg_parser.get<std::string>(FILELIST) + " for reading");
+        throw lbann_exception(std::string{} + __FILE__ + " " +
+                              std::to_string(__LINE__) + " :: failed to open " +
+                              arg_parser.get<std::string>(FILELIST) +
+                              " for reading");
       }
       std::string line;
       while (getline(in, line)) {
diff --git a/model_zoo/jag_utils/convert.cpp b/model_zoo/jag_utils/convert.cpp
index d23119dc112..8c930ae306f 100644
--- a/model_zoo/jag_utils/convert.cpp
+++ b/model_zoo/jag_utils/convert.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -61,7 +61,7 @@ int main(int argc, char *argv[]) {
   auto& arg_parser = global_argument_parser();
   construct_std_options();
   construct_jag_options();
-	try {
+  try {
     arg_parser.parse(argc, argv);
   }
   catch (std::exception const& e) {
diff --git a/model_zoo/jag_utils/convert_npz_to_conduit.cpp b/model_zoo/jag_utils/convert_npz_to_conduit.cpp
index d83f44542ee..32f3664c929 100644
--- a/model_zoo/jag_utils/convert_npz_to_conduit.cpp
+++ b/model_zoo/jag_utils/convert_npz_to_conduit.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -49,18 +49,18 @@ int main(int argc, char *argv[]) {
     auto& arg_parser = global_argument_parser();
     construct_std_options();
     construct_jag_options();
-		try {
-			arg_parser.parse(argc, argv);
-		}
-		catch (std::exception const& e) {
-			auto guessed_rank = guess_global_rank();
-			if (guessed_rank <= 0)
-				// Cannot call `El::ReportException` because MPI hasn't been
-				// initialized yet.
-				std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
-									<< e.what() << "\n\nProcess terminating." << std::endl;
-			std::terminate();
-		}
+    try {
+      arg_parser.parse(argc, argv);
+    }
+    catch (std::exception const& e) {
+      auto guessed_rank = guess_global_rank();
+      if (guessed_rank <= 0)
+        // Cannot call `El::ReportException` because MPI hasn't been
+        // initialized yet.
+        std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
+                  << e.what() << "\n\nProcess terminating." << std::endl;
+      std::terminate();
+    }
 
     if (arg_parser.get<std::string>(FILELIST) == "") {
       if (master) {
@@ -112,7 +112,7 @@ int main(int argc, char *argv[]) {
 
           if (name == "frames") {
             //pass
-          } 
+          }
 
           else if (name == "bbs") {
             float *data = a[name].data<float>();
@@ -120,7 +120,7 @@ int main(int argc, char *argv[]) {
             node[LBANN_DATA_ID_STR(sample_index) + "/" + name + "/data"].set(data + offset, num_words[name]);
             node[LBANN_DATA_ID_STR(sample_index) + "/" + name + "/shape"].set(shapes[name]);
             node[LBANN_DATA_ID_STR(sample_index) + "/" + name + "/size"].set(num_words[name]);
-          } 
+          }
 
           else { // rots, states, tilts, density_sig1, probs
             size_t offset = sample_index*num_words[name];
@@ -155,4 +155,3 @@ int main(int argc, char *argv[]) {
   // Clean up
   return EXIT_SUCCESS;
 }
-
diff --git a/model_zoo/jag_utils/detect_corruption.cpp b/model_zoo/jag_utils/detect_corruption.cpp
index 175b669f6e1..8bd61366511 100644
--- a/model_zoo/jag_utils/detect_corruption.cpp
+++ b/model_zoo/jag_utils/detect_corruption.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/model_zoo/jag_utils/extract_random_samples.cpp b/model_zoo/jag_utils/extract_random_samples.cpp
index 277437c0fa9..b3ed834e67d 100644
--- a/model_zoo/jag_utils/extract_random_samples.cpp
+++ b/model_zoo/jag_utils/extract_random_samples.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -123,7 +123,7 @@ int main(int argc, char *argv[]) {
     }
     comm->world_broadcast<int>(0, &num_output_dirs, 1);
     // TODO MRW
-    //opts->set_option("num_output_dirs", num_output_dirs);
+    // opts->set_option("num_output_dirs", num_output_dirs);
 
     // get the set of global indices for the samples in our extracted set
     std::set<int> indices;
@@ -188,17 +188,25 @@ void get_random_sample_indices(const std::unordered_set<int> &exclude, std::set<
 }
 
 std::string usage() {
-    std::string u =
-      "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"
-      "usage: extract_random_samples --index_fn=<string> --num_samples=<int> --output_base_dir=<string> --random_seed=<int> [ --exclude=<string> ] [ --num_samples_per_output_file=<int> ]\n"
-      "where: --index_fn is the output file from the build_index executable\n"
-      "       --num_samples is the number of random samples to be extracted\n"
-      "       --output_base_dir will be created if it doesn't exist\n"
-      "       --exclude is an optional filename containing IDs of samples that should not appear in the output\n"
-      "       --random_seed is required to ensure all procs generate identical random sample indices.\n"
-      "       --num_samples_per_file is number of samples per output file; default is 1000 (a maximum of one output file per processor may contain fewer)\n"
-      "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n\n";
-    return u;
+  std::string u =
+    "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
+    "+++\n"
+    "usage: extract_random_samples --index_fn=<string> --num_samples=<int> "
+    "--output_base_dir=<string> --random_seed=<int> [ --exclude=<string> ] [ "
+    "--num_samples_per_output_file=<int> ]\n"
+    "where: --index_fn is the output file from the build_index executable\n"
+    "       --num_samples is the number of random samples to be extracted\n"
+    "       --output_base_dir will be created if it doesn't exist\n"
+    "       --exclude is an optional filename containing IDs of samples that "
+    "should not appear in the output\n"
+    "       --random_seed is required to ensure all procs generate identical "
+    "random sample indices.\n"
+    "       --num_samples_per_file is number of samples per output file; "
+    "default is 1000 (a maximum of one output file per processor may contain "
+    "fewer)\n"
+    "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
+    "+++\n\n";
+  return u;
 }
 
 void build_exclusion_set(std::unordered_set<int> &exclude) {
diff --git a/model_zoo/jag_utils/generate_corrupt_samples.cpp b/model_zoo/jag_utils/generate_corrupt_samples.cpp
index 5bc498b599a..d0a162195d3 100644
--- a/model_zoo/jag_utils/generate_corrupt_samples.cpp
+++ b/model_zoo/jag_utils/generate_corrupt_samples.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -52,21 +52,21 @@ int main(int argc, char *argv[]) {
 
   std::stringstream err;
 
-	auto& arg_parser = global_argument_parser();
-	construct_std_options();
-	construct_jag_options();
-	try {
-		arg_parser.parse(argc, argv);
-	}
-	catch (std::exception const& e) {
-		auto guessed_rank = guess_global_rank();
-		if (guessed_rank <= 0)
-			// Cannot call `El::ReportException` because MPI hasn't been
-			// initialized yet.
-			std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
-								<< e.what() << "\n\nProcess terminating." << std::endl;
-		std::terminate();
-	}
+  auto& arg_parser = global_argument_parser();
+  construct_std_options();
+  construct_jag_options();
+  try {
+    arg_parser.parse(argc, argv);
+  }
+  catch (std::exception const& e) {
+    auto guessed_rank = guess_global_rank();
+    if (guessed_rank <= 0)
+      // Cannot call `El::ReportException` because MPI hasn't been
+      // initialized yet.
+      std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
+                << e.what() << "\n\nProcess terminating." << std::endl;
+    std::terminate();
+  }
 
   // sanity check invocation
   if (arg_parser.get<std::string>(FILELIST) == "") {
diff --git a/model_zoo/jag_utils/load_balance.cpp b/model_zoo/jag_utils/load_balance.cpp
index 37d084b29fa..f9628414cef 100644
--- a/model_zoo/jag_utils/load_balance.cpp
+++ b/model_zoo/jag_utils/load_balance.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -100,7 +100,10 @@ int main(int argc, char *argv[]) {
       std::stringstream s;
       std::ifstream in(arg_parser.get<std::string>(FILELIST).c_str());
       if (!in) {
-          throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to open " + arg_parser.get<std::string>(FILELIST) + " for reading");
+        throw lbann_exception(std::string{} + __FILE__ + " " +
+                              std::to_string(__LINE__) + " :: failed to open " +
+                              arg_parser.get<std::string>(FILELIST) +
+                              " for reading");
       }
       std::string line;
       while (getline(in, line)) {
diff --git a/model_zoo/jag_utils/load_bundle2raw.cpp b/model_zoo/jag_utils/load_bundle2raw.cpp
index 9d28b788c77..96c1dc43f2a 100644
--- a/model_zoo/jag_utils/load_bundle2raw.cpp
+++ b/model_zoo/jag_utils/load_bundle2raw.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/model_zoo/jag_utils/select_samples.cpp b/model_zoo/jag_utils/select_samples.cpp
index 7c78d255b27..f9914752eb3 100644
--- a/model_zoo/jag_utils/select_samples.cpp
+++ b/model_zoo/jag_utils/select_samples.cpp
@@ -164,13 +164,13 @@ int main(int argc, char **argv) {
 void check_cmd_line() {
   auto& arg_parser = global_argument_parser();
   stringstream err;
-  if (! (arg_parser.get<std::string>(INDEX_FN) != "" &&
-         arg_parser.get<std::string>(MAPPING_FN) != "" &&
-         arg_parser.get<int>(NUM_SAMPLES_PER_LIST) != -1 &&
-         arg_parser.get<int>(NUM_LISTS) != -1 &&
-         arg_parser.get<int>(RANDOM_SEED) != -1 &&
-         arg_parser.get<std::string>(OUTPUT_DIR) != "" &&
-         arg_parser.get<std::string>(OUTPUT_BASE_FN) != "")) {
+  if (!(arg_parser.get<std::string>(INDEX_FN) != "" &&
+        arg_parser.get<std::string>(MAPPING_FN) != "" &&
+        arg_parser.get<int>(NUM_SAMPLES_PER_LIST) != -1 &&
+        arg_parser.get<int>(NUM_LISTS) != -1 &&
+        arg_parser.get<int>(RANDOM_SEED) != -1 &&
+        arg_parser.get<std::string>(OUTPUT_DIR) != "" &&
+        arg_parser.get<std::string>(OUTPUT_BASE_FN) != "")) {
     cout << help_msg();
     if (arg_parser.get<std::string>(INDEX_FN) == "") {
       cout << "missing --index_fn=<string> \n";
diff --git a/model_zoo/jag_utils/test_conduit_hdf5.cpp b/model_zoo/jag_utils/test_conduit_hdf5.cpp
index 84cefbf8362..09db2a23818 100644
--- a/model_zoo/jag_utils/test_conduit_hdf5.cpp
+++ b/model_zoo/jag_utils/test_conduit_hdf5.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -56,21 +56,21 @@ int main(int argc, char *argv[]) {
     }
   }
 
-	auto& arg_parser = global_argument_parser();
-	construct_std_options();
-	construct_jag_options();
-	try {
-		arg_parser.parse(argc, argv);
-	}
-	catch (std::exception const& e) {
-		auto guessed_rank = guess_global_rank();
-		if (guessed_rank <= 0)
-			// Cannot call `El::ReportException` because MPI hasn't been
-			// initialized yet.
-			std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
-								<< e.what() << "\n\nProcess terminating." << std::endl;
-		std::terminate();
-	}
+  auto& arg_parser = global_argument_parser();
+  construct_std_options();
+  construct_jag_options();
+  try {
+    arg_parser.parse(argc, argv);
+  }
+  catch (std::exception const& e) {
+    auto guessed_rank = guess_global_rank();
+    if (guessed_rank <= 0)
+      // Cannot call `El::ReportException` because MPI hasn't been
+      // initialized yet.
+      std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
+                << e.what() << "\n\nProcess terminating." << std::endl;
+    std::terminate();
+  }
 
   // sanity check invocation
   if (arg_parser.get<std::string>(FILENAME) == "") {
@@ -79,54 +79,54 @@ int main(int argc, char *argv[]) {
     }
   }
 
-    const std::string filename = arg_parser.get<std::string>(FILENAME);
-
-    // get lists of inputs and scalars to read from file
-    std::unordered_set<std::string> input_names;
-    std::unordered_set<std::string> scalar_names;
-    std::unordered_set<std::string> image_names;
-    get_input_names(input_names);
-    get_scalar_names(scalar_names);
-    get_image_names(image_names);
-
-    hid_t hdf5_file_hnd;
-    std::string key;
-    conduit::Node n_ok;
-    conduit::Node tmp;
-    std::cerr << "opening for read: " << filename << "\n";
-    hdf5_file_hnd = conduit::relay::io::hdf5_open_file_for_read( filename.c_str() );
-
-    std::vector<std::string> cnames;
-    std::cerr << "calling: hdf5_group_list_child_names\n";
-    conduit::relay::io::hdf5_group_list_child_names(hdf5_file_hnd, "/", cnames);
-    std::cerr << "file contains " << cnames.size() << " samples\n";
-
-    for (size_t i=0; i<cnames.size(); i++) {
-
-      key = "/" + cnames[i] + "/performance/success";
-      std::cerr << "calling: hdf5_read for key: " << key << "\n";
-      conduit::relay::io::hdf5_read(hdf5_file_hnd, key, n_ok);
-
-      int success = n_ok.to_int64();
-      if (success == 1) {
-        for (auto t : input_names) {
-            key = cnames[i] + "/inputs/" + t;
-            std::cerr << "calling: hdf5_read for key: " << key << "\n";
-            conduit::relay::io::hdf5_read(hdf5_file_hnd, key, tmp);
-        }
-
-        for (auto t : scalar_names) {
-            key = cnames[i] + "/outputs/scalars/" + t;
-            std::cerr << "calling: hdf5_read for key: " << key << "\n";
-            conduit::relay::io::hdf5_read(hdf5_file_hnd, key, tmp);
-        }
-
-        for (auto t : image_names) {
-            key = cnames[i] + "/outputs/images/" + t;
-            std::cerr << "calling: hdf5_read for key: " << key << "\n";
-            conduit::relay::io::hdf5_read(hdf5_file_hnd, key, tmp);
-        }
+  const std::string filename = arg_parser.get<std::string>(FILENAME);
+
+  // get lists of inputs and scalars to read from file
+  std::unordered_set<std::string> input_names;
+  std::unordered_set<std::string> scalar_names;
+  std::unordered_set<std::string> image_names;
+  get_input_names(input_names);
+  get_scalar_names(scalar_names);
+  get_image_names(image_names);
+
+  hid_t hdf5_file_hnd;
+  std::string key;
+  conduit::Node n_ok;
+  conduit::Node tmp;
+  std::cerr << "opening for read: " << filename << "\n";
+  hdf5_file_hnd = conduit::relay::io::hdf5_open_file_for_read(filename.c_str());
+
+  std::vector<std::string> cnames;
+  std::cerr << "calling: hdf5_group_list_child_names\n";
+  conduit::relay::io::hdf5_group_list_child_names(hdf5_file_hnd, "/", cnames);
+  std::cerr << "file contains " << cnames.size() << " samples\n";
+
+  for (size_t i = 0; i < cnames.size(); i++) {
+
+    key = "/" + cnames[i] + "/performance/success";
+    std::cerr << "calling: hdf5_read for key: " << key << "\n";
+    conduit::relay::io::hdf5_read(hdf5_file_hnd, key, n_ok);
+
+    int success = n_ok.to_int64();
+    if (success == 1) {
+      for (auto t : input_names) {
+        key = cnames[i] + "/inputs/" + t;
+        std::cerr << "calling: hdf5_read for key: " << key << "\n";
+        conduit::relay::io::hdf5_read(hdf5_file_hnd, key, tmp);
+      }
+
+      for (auto t : scalar_names) {
+        key = cnames[i] + "/outputs/scalars/" + t;
+        std::cerr << "calling: hdf5_read for key: " << key << "\n";
+        conduit::relay::io::hdf5_read(hdf5_file_hnd, key, tmp);
       }
+
+      for (auto t : image_names) {
+        key = cnames[i] + "/outputs/images/" + t;
+        std::cerr << "calling: hdf5_read for key: " << key << "\n";
+        conduit::relay::io::hdf5_read(hdf5_file_hnd, key, tmp);
+      }
+    }
     }
 
   return 0;
diff --git a/model_zoo/jag_utils/test_reading_speed.cpp b/model_zoo/jag_utils/test_reading_speed.cpp
index 5f9160edcf9..bdca301fbbf 100644
--- a/model_zoo/jag_utils/test_reading_speed.cpp
+++ b/model_zoo/jag_utils/test_reading_speed.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -58,21 +58,21 @@ void test_jag(string filename);
 int main(int argc, char *argv[]) {
   world_comm_ptr comm = initialize(argc, argv);
 
-	auto& arg_parser = global_argument_parser();
-	construct_std_options();
-	construct_jag_options();
-	try {
-		arg_parser.parse(argc, argv);
-	}
-	catch (std::exception const& e) {
-		auto guessed_rank = guess_global_rank();
-		if (guessed_rank <= 0)
-			// Cannot call `El::ReportException` because MPI hasn't been
-			// initialized yet.
-			std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
-								<< e.what() << "\n\nProcess terminating." << std::endl;
-		std::terminate();
-	}
+  auto& arg_parser = global_argument_parser();
+  construct_std_options();
+  construct_jag_options();
+  try {
+    arg_parser.parse(argc, argv);
+  }
+  catch (std::exception const& e) {
+    auto guessed_rank = guess_global_rank();
+    if (guessed_rank <= 0)
+      // Cannot call `El::ReportException` because MPI hasn't been
+      // initialized yet.
+      std::cerr << "Error during argument parsing:\n\ne.what():\n\n  "
+                << e.what() << "\n\nProcess terminating." << std::endl;
+    std::terminate();
+  }
 
   if (arg_parser.get<std::string>(FILELIST) == "") {
     LBANN_ERROR("usage: test_speed_hydra_ --filelist=<string> --jag");
@@ -80,7 +80,8 @@ int main(int argc, char *argv[]) {
 
   if (arg_parser.get<bool>(JAG)) {
     test_jag(arg_parser.get<std::string>(FILELIST));
-  } else {
+  }
+  else {
     test_hydra(arg_parser.get<std::string>(FILELIST));
   }
   return EXIT_SUCCESS;
diff --git a/model_zoo/lbann.cpp b/model_zoo/lbann.cpp
index 49ce7004965..abedd83ac5d 100644
--- a/model_zoo/lbann.cpp
+++ b/model_zoo/lbann.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -152,8 +152,7 @@ int main(int argc, char* argv[])
     lbann_data::Trainer* pb_trainer = pb.mutable_trainer();
 
     // Construct the trainer
-    auto& trainer =
-      construct_trainer(comm.get(), pb_trainer, pb);
+    auto& trainer = construct_trainer(comm.get(), pb_trainer, pb);
 
     thread_pool& io_thread_pool = trainer.get_io_thread_pool();
 
diff --git a/model_zoo/lbann_aecycgan.cpp b/model_zoo/lbann_aecycgan.cpp
index 15a44d4e1b1..5b6421ca234 100644
--- a/model_zoo/lbann_aecycgan.cpp
+++ b/model_zoo/lbann_aecycgan.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -114,27 +114,42 @@ int main(int argc, char *argv[]) {
       training_dr_linearized_data_size = dr->get_linearized_data_size();
     }
 
-    auto model_1 = build_model_from_prototext(argc, argv, pb_trainer, *(pbs[0]),
-                                              comm.get(), io_thread_pool,
-                                              trainer.get_callbacks_with_ownership(),
-                                              training_dr_linearized_data_size); //ae
+    auto model_1 =
+      build_model_from_prototext(argc,
+                                 argv,
+                                 pb_trainer,
+                                 *(pbs[0]),
+                                 comm.get(),
+                                 io_thread_pool,
+                                 trainer.get_callbacks_with_ownership(),
+                                 training_dr_linearized_data_size); // ae
     std::unique_ptr<model>
       model_2, //cycgan
       model_3; //ae+cycgan
 
 
     if (pbs.size() > 1) {
-      model_2 = build_model_from_prototext(argc, argv, pb_trainer, *(pbs[1]),
-                                           comm.get(), io_thread_pool,
-                                           trainer.get_callbacks_with_ownership(),
-                                           training_dr_linearized_data_size);
+      model_2 =
+        build_model_from_prototext(argc,
+                                   argv,
+                                   pb_trainer,
+                                   *(pbs[1]),
+                                   comm.get(),
+                                   io_thread_pool,
+                                   trainer.get_callbacks_with_ownership(),
+                                   training_dr_linearized_data_size);
     }
 
     if (pbs.size() > 2) {
-      model_3 = build_model_from_prototext(argc, argv, pb_trainer, *(pbs[2]),
-                                           comm.get(), io_thread_pool,
-                                           trainer.get_callbacks_with_ownership(),
-                                           training_dr_linearized_data_size);
+      model_3 =
+        build_model_from_prototext(argc,
+                                   argv,
+                                   pb_trainer,
+                                   *(pbs[2]),
+                                   comm.get(),
+                                   io_thread_pool,
+                                   trainer.get_callbacks_with_ownership(),
+                                   training_dr_linearized_data_size);
     }
 
 
diff --git a/model_zoo/lbann_cycgan.cpp b/model_zoo/lbann_cycgan.cpp
index cfac897008d..919a7e90839 100644
--- a/model_zoo/lbann_cycgan.cpp
+++ b/model_zoo/lbann_cycgan.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -102,8 +102,8 @@ int main(int argc, char *argv[]) {
     }
 
     if (!arg_parser.get<bool>(DISABLE_SIGNAL_HANDLER)) {
-      std::string file_base = (arg_parser.get<bool>(STACK_TRACE_TO_FILE) ?
-                               "stack_trace" : "");
+      std::string file_base =
+        (arg_parser.get<bool>(STACK_TRACE_TO_FILE) ? "stack_trace" : "");
       stack_trace::register_signal_handler(file_base);
     }
 
@@ -132,11 +132,16 @@ int main(int argc, char *argv[]) {
       training_dr_linearized_data_size = dr->get_linearized_data_size();
     }
 
-    auto model_1 = build_model_from_prototext(argc, argv, pb_trainer, *(pbs[0]),
-                                              comm.get(), io_thread_pool,
-                                              trainer.get_callbacks_with_ownership(),
-                                              training_dr_linearized_data_size); //D1 solver
-    //hack, overide model name to make reporting easy, what can break?"
+    auto model_1 =
+      build_model_from_prototext(argc,
+                                 argv,
+                                 pb_trainer,
+                                 *(pbs[0]),
+                                 comm.get(),
+                                 io_thread_pool,
+                                 trainer.get_callbacks_with_ownership(),
+                                 training_dr_linearized_data_size); // D1 solver
+    // hack, overide model name to make reporting easy, what can break?"
     std::unique_ptr<model> model_2, //G1 solver
       model_3, //G2 solver
 
@@ -145,31 +150,51 @@ int main(int argc, char *argv[]) {
       ae_cycgan_model; //contain layer(s) from (cyc)GAN
 
     if (pbs.size() > 1) {
-      model_2 = build_model_from_prototext(argc, argv, pb_trainer, *(pbs[1]),
-                                           comm.get(), io_thread_pool,
-                                           trainer.get_callbacks_with_ownership(),
-                                           training_dr_linearized_data_size);
+      model_2 =
+        build_model_from_prototext(argc,
+                                   argv,
+                                   pb_trainer,
+                                   *(pbs[1]),
+                                   comm.get(),
+                                   io_thread_pool,
+                                   trainer.get_callbacks_with_ownership(),
+                                   training_dr_linearized_data_size);
     }
 
     if (pbs.size() > 2) {
-      model_3 = build_model_from_prototext(argc, argv, pb_trainer, *(pbs[2]),
-                                           comm.get(), io_thread_pool,
-                                           trainer.get_callbacks_with_ownership(),
-                                           training_dr_linearized_data_size);
+      model_3 =
+        build_model_from_prototext(argc,
+                                   argv,
+                                   pb_trainer,
+                                   *(pbs[2]),
+                                   comm.get(),
+                                   io_thread_pool,
+                                   trainer.get_callbacks_with_ownership(),
+                                   training_dr_linearized_data_size);
     }
 
     if (pbs.size() > 3) {
-      ae_model = build_model_from_prototext(argc, argv, pb_trainer, *(pbs[3]),
-                                            comm.get(), io_thread_pool,
-                                            trainer.get_callbacks_with_ownership(),
-                                            training_dr_linearized_data_size);
+      ae_model =
+        build_model_from_prototext(argc,
+                                   argv,
+                                   pb_trainer,
+                                   *(pbs[3]),
+                                   comm.get(),
+                                   io_thread_pool,
+                                   trainer.get_callbacks_with_ownership(),
+                                   training_dr_linearized_data_size);
     }
 
     if (pbs.size() > 4) {
-      ae_cycgan_model = build_model_from_prototext(argc, argv, pb_trainer, *(pbs[4]),
-                                                   comm.get(), io_thread_pool,
-                                                   trainer.get_callbacks_with_ownership(),
-                                                   training_dr_linearized_data_size);
+      ae_cycgan_model =
+        build_model_from_prototext(argc,
+                                   argv,
+                                   pb_trainer,
+                                   *(pbs[4]),
+                                   comm.get(),
+                                   io_thread_pool,
+                                   trainer.get_callbacks_with_ownership(),
+                                   training_dr_linearized_data_size);
     }
 
     const lbann_data::Model pb_model = pbs[0]->model();
diff --git a/model_zoo/lbann_gan.cpp b/model_zoo/lbann_gan.cpp
index 26f49da6048..e2e0e6338c6 100644
--- a/model_zoo/lbann_gan.cpp
+++ b/model_zoo/lbann_gan.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -114,14 +114,26 @@ int main(int argc, char *argv[]) {
       training_dr_linearized_data_size = dr->get_linearized_data_size();
     }
 
-    auto model_1 = build_model_from_prototext(argc, argv, pb_trainer, *(pbs[0]), comm.get(), io_thread_pool,
-                                              trainer.get_callbacks_with_ownership(),
-                                              training_dr_linearized_data_size); //discriminator model
+    auto model_1 = build_model_from_prototext(
+      argc,
+      argv,
+      pb_trainer,
+      *(pbs[0]),
+      comm.get(),
+      io_thread_pool,
+      trainer.get_callbacks_with_ownership(),
+      training_dr_linearized_data_size);      // discriminator model
     std::unique_ptr<model> model_2 = nullptr; //adversarial model
     if (pbs.size() > 1) {
-      model_2 = build_model_from_prototext(argc, argv, pb_trainer, *(pbs[1]), comm.get(), io_thread_pool,
-                                           trainer.get_callbacks_with_ownership(),
-                                           training_dr_linearized_data_size);
+      model_2 =
+        build_model_from_prototext(argc,
+                                   argv,
+                                   pb_trainer,
+                                   *(pbs[1]),
+                                   comm.get(),
+                                   io_thread_pool,
+                                   trainer.get_callbacks_with_ownership(),
+                                   training_dr_linearized_data_size);
     }
 
     const lbann_data::Model pb_model = pbs[0]->model();
diff --git a/model_zoo/lbann_help.cpp b/model_zoo/lbann_help.cpp
index 69eae1c3ccd..05574898ca8 100644
--- a/model_zoo/lbann_help.cpp
+++ b/model_zoo/lbann_help.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/model_zoo/lbann_inf.cpp b/model_zoo/lbann_inf.cpp
index d1af807c48f..63c32207cb9 100644
--- a/model_zoo/lbann_inf.cpp
+++ b/model_zoo/lbann_inf.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -92,8 +92,12 @@ int main(int argc, char *argv[]) {
     std::vector<std::unique_ptr<model>> models;
     for(auto&& pb_model : pbs) {
       models.emplace_back(
-        build_model_from_prototext(argc, argv, pb_trainer, *pb_model,
-                                   comm.get(), io_thread_pool,
+        build_model_from_prototext(argc,
+                                   argv,
+                                   pb_trainer,
+                                   *pb_model,
+                                   comm.get(),
+                                   io_thread_pool,
                                    trainer.get_callbacks_with_ownership(),
                                    training_dr_linearized_data_size));
     }
diff --git a/model_zoo/tests/conduit_timing_test.cpp b/model_zoo/tests/conduit_timing_test.cpp
index 6cfd5edfe08..3d328f8f89a 100644
--- a/model_zoo/tests/conduit_timing_test.cpp
+++ b/model_zoo/tests/conduit_timing_test.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/model_zoo/tests/model_jag_single_layer_ae.prototext b/model_zoo/tests/model_jag_single_layer_ae.prototext
index e5c42675c72..d8f593ba7bb 100644
--- a/model_zoo/tests/model_jag_single_layer_ae.prototext
+++ b/model_zoo/tests/model_jag_single_layer_ae.prototext
@@ -70,7 +70,7 @@ model {
   # Data
   layer {
     input {
-      target_mode: "N/A"
+      data_field: "samples"
     }
     name: "data"
     data_layout: "data_parallel"
diff --git a/model_zoo/tests/model_lenet_mnist_ckpt.prototext b/model_zoo/tests/model_lenet_mnist_ckpt.prototext
index a3fcd04338b..56704f0e2b0 100644
--- a/model_zoo/tests/model_lenet_mnist_ckpt.prototext
+++ b/model_zoo/tests/model_lenet_mnist_ckpt.prototext
@@ -72,24 +72,19 @@ model {
   ###################################################
 
   layer {
-    name: "data"
-    children: "image label"
+    name: "image"
     data_layout: "data_parallel"
     input {
-      target_mode: "classification"
+      data_field: "samples"
     }
   }
+
   layer {
-    parents: "data"
-    name: "image"
-    data_layout: "data_parallel"
-    split {}
-  }
-  layer {
-    parents: "data"
     name: "label"
     data_layout: "data_parallel"
-    split {}
+    input {
+      data_field: "labels"
+    }
   }
 
   layer {
diff --git a/model_zoo/tests/model_lenet_mnist_dist_ckpt.prototext b/model_zoo/tests/model_lenet_mnist_dist_ckpt.prototext
index e4409f22d4b..5c3f58be7be 100644
--- a/model_zoo/tests/model_lenet_mnist_dist_ckpt.prototext
+++ b/model_zoo/tests/model_lenet_mnist_dist_ckpt.prototext
@@ -75,24 +75,19 @@ model {
   ###################################################
 
   layer {
-    name: "data"
-    children: "image label"
+    name: "image"
     data_layout: "data_parallel"
     input {
-      target_mode: "classification"
+      data_field: "samples"
     }
   }
+
   layer {
-    parents: "data"
-    name: "image"
-    data_layout: "data_parallel"
-    split {}
-  }
-  layer {
-    parents: "data"
     name: "label"
     data_layout: "data_parallel"
-    split {}
+    input {
+      data_field: "labels"
+    }
   }
 
   layer {
diff --git a/model_zoo/tests/model_lenet_mnist_lbann2ckpt.prototext b/model_zoo/tests/model_lenet_mnist_lbann2ckpt.prototext
index a49e1e94b10..92458aafc39 100644
--- a/model_zoo/tests/model_lenet_mnist_lbann2ckpt.prototext
+++ b/model_zoo/tests/model_lenet_mnist_lbann2ckpt.prototext
@@ -70,24 +70,19 @@ model {
   ###################################################
 
   layer {
-    name: "data"
-    children: "image label"
+    name: "image"
     data_layout: "data_parallel"
     input {
-      target_mode: "classification"
+      data_field: "samples"
     }
   }
+
   layer {
-    parents: "data"
-    name: "image"
-    data_layout: "data_parallel"
-    split {}
-  }
-  layer {
-    parents: "data"
     name: "label"
     data_layout: "data_parallel"
-    split {}
+    input {
+      data_field: "labels"
+    }
   }
 
   layer {
diff --git a/src/callbacks/print_statistics.cpp b/src/callbacks/print_statistics.cpp
index 3ba59d6bc24..afaa3ef8066 100644
--- a/src/callbacks/print_statistics.cpp
+++ b/src/callbacks/print_statistics.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -184,7 +184,8 @@ void print_statistics::report_results(model *m) {
     const int num_trainers = comm->get_num_trainers();
 
     auto& arg_parser = global_argument_parser();
-    bool allow_global_statistics = arg_parser.get<bool>(LTFB_ALLOW_GLOBAL_STATISTICS);
+    bool allow_global_statistics =
+      arg_parser.get<bool>(LTFB_ALLOW_GLOBAL_STATISTICS);
     std::stringstream report;
 
     // Report objective function value
diff --git a/src/callbacks/timer.cpp b/src/callbacks/timer.cpp
index b9da0fef6e8..4816b0b77cd 100644
--- a/src/callbacks/timer.cpp
+++ b/src/callbacks/timer.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -143,7 +143,8 @@ void timer::timing_end(model& m) {
   if (comm.am_trainer_master()) {
 
     auto& arg_parser = global_argument_parser();
-    bool allow_global_statistics = arg_parser.get<bool>(LTFB_ALLOW_GLOBAL_STATISTICS);
+    bool allow_global_statistics =
+      arg_parser.get<bool>(LTFB_ALLOW_GLOBAL_STATISTICS);
     std::stringstream report;
 
     if(allow_global_statistics) {
diff --git a/src/data_coordinator/buffered_data_coordinator.cpp b/src/data_coordinator/buffered_data_coordinator.cpp
index a957f31f233..badd3fe813b 100644
--- a/src/data_coordinator/buffered_data_coordinator.cpp
+++ b/src/data_coordinator/buffered_data_coordinator.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -41,12 +41,31 @@
 namespace lbann {
 
 template <typename TensorDataType>
-void buffered_data_coordinator<TensorDataType>::setup(thread_pool& io_thread_pool, int max_mini_batch_size, std::map<execution_mode, generic_data_reader *> data_readers) {
-  data_coordinator::setup(io_thread_pool, max_mini_batch_size, data_readers);
+void buffered_data_coordinator<TensorDataType>::register_active_data_field(
+  data_field_type const data_field)
+{
+  data_coordinator::register_active_data_field(data_field);
+  for (const auto& buf_map : m_data_buffers) {
+    const data_buffer_map_t& buffer_map = buf_map;
+    for (auto& [mode, buffer] : buffer_map) {
+      buffer->initialize_buffer_for_data_field(data_field, m_comm);
+    }
+  }
+  setup_data_fields(get_trainer().get_max_mini_batch_size());
+}
+
+template <typename TensorDataType>
+void buffered_data_coordinator<TensorDataType>::setup_data_fields(
+  int max_mini_batch_size)
+{
+  if (m_active_data_fields.size() == 0) {
+    LBANN_ERROR(
+      "Models have not registered data fields with the data coordinator");
+  }
 
-  El::Int num_neurons = get_linearized_data_size();
 #ifdef LBANN_HAS_DISTCONV
   if (dc::is_cosmoflow_parallel_io_enabled()) {
+    El::Int num_neurons = get_linearized_data_size();
     num_neurons /= dc::get_number_of_io_partitions();
     // TODO: Make sure that TensorDatType is equivalent to the HDF5
     // data reader's data type (float as default).
@@ -70,23 +89,28 @@ void buffered_data_coordinator<TensorDataType>::setup(thread_pool& io_thread_poo
   if(partial_mini_batch_size > 0 && this->m_comm->get_rank_in_trainer() < partial_mini_batch_size) {
     local_mini_batch_size++;
   }
-  // generic_data_reader *data_reader = get_data_reader(mode);
-  for (const auto& buf_map : m_data_buffers) {
-    const data_buffer_map_t& buffer_map = buf_map;
-    for (const auto& b : buffer_map) {
-      observer_ptr<data_buffer<IODataType>> data_buffer = b.second.get();
-      // for(auto idt : input_data_type_iterator()) {
-      data_buffer->m_input_buffers[input_data_type::SAMPLES]->Resize(num_neurons, max_mini_batch_size);
-      if(has_labels()) {
-        data_buffer->m_input_buffers[input_data_type::LABELS]->Resize(get_linearized_label_size(), max_mini_batch_size);
-      }
-      if(has_responses()){
-        data_buffer->m_input_buffers[input_data_type::RESPONSES]->Resize(get_linearized_response_size(), max_mini_batch_size);
+
+  // Check to see if there are any data fields with unallocated buffers
+  for (auto& data_field : m_active_data_fields) {
+    for (const auto& buf_map : m_data_buffers) {
+      const data_buffer_map_t& buffer_map = buf_map;
+      for (const auto& [mode, data_buffer] : buffer_map) {
+        auto& phase_io_buffer = data_buffer->m_input_buffers[data_field];
+        // Check to see if a buffer has already been allocated.  If
+        // not, resize and zero it
+        if (phase_io_buffer->IsEmpty() || phase_io_buffer->Width() == 0 ||
+            phase_io_buffer->Height() == 0) {
+          El::Int linearized_size = get_linearized_size(data_field);
+          data_buffer->m_input_buffers[data_field]->Resize(linearized_size,
+                                                           max_mini_batch_size);
+
+          /// The amount of space needed will vary based on input layer type,
+          /// but the batch size is the maximum space necessary
+          El::Zeros_seq(data_buffer->m_indices_fetched_per_mb,
+                        local_mini_batch_size,
+                        1);
+        }
       }
-      /// The amount of space needed will vary based on input layer type,
-      /// but the batch size is the maximum space necessary
-      El::Zeros_seq(data_buffer->m_indices_fetched_per_mb, local_mini_batch_size, 1);
-      // }
     }
   }
 }
@@ -110,10 +134,12 @@ int buffered_data_coordinator<TensorDataType>::fetch_to_local_matrix(data_buffer
   }
 
   buf.m_num_samples_fetched = 0;
-  if (this->m_comm->get_rank_in_trainer() < num_parallel_readers
-      && (buf.m_input_buffers[input_data_type::SAMPLES]->LocalHeight() != 0 && buf.m_input_buffers[input_data_type::SAMPLES]->LocalWidth() != 0)) {
+  /// BVE FIXME change the guard
+  if (this->m_comm->get_rank_in_trainer() < num_parallel_readers &&
+      (buf.m_input_buffers[INPUT_DATA_TYPE_SAMPLES]->LocalHeight() != 0 &&
+       buf.m_input_buffers[INPUT_DATA_TYPE_SAMPLES]->LocalWidth() != 0)) {
     /// Create a map of the local matrices to pass into the data reader
-    std::map<input_data_type, CPUMat*> local_input_buffers;
+    std::map<data_field_type, CPUMat*> local_input_buffers;
     for(auto& b : buf.m_input_buffers) {
       local_input_buffers[b.first] = static_cast<CPUMat*>(&(b.second->Matrix()));
     }
@@ -134,9 +160,8 @@ void buffered_data_coordinator<TensorDataType>::fp_setup_data(data_buffer<IOData
 #ifdef LBANN_HAS_DISTCONV
   cur_mini_batch_size *= dc::get_number_of_io_partitions();
 #endif
-  for(auto idt : input_data_type_iterator()) {
-    auto& mat = *buffer.m_input_buffers[idt];
-    mat.Resize(mat.Height(), cur_mini_batch_size);
+  for (auto& [data_field, mat] : buffer.m_input_buffers) {
+    mat->Resize(mat->Height(), cur_mini_batch_size);
   }
 }
 
@@ -202,6 +227,12 @@ void buffered_data_coordinator<TensorDataType>::fetch_data(execution_mode mode)
 
 template <typename TensorDataType>
 bool buffered_data_coordinator<TensorDataType>::epoch_complete(execution_mode mode) {
+  // Use the predetermined size of the mini-batch to set the current
+  // batch size for the neural network
+  int num_samples_in_batch = get_current_mini_batch_size(mode);
+  // BVE When we finish the epoch we can increment the number of
+  // samples that have been
+  update_num_samples_processed(mode, num_samples_in_batch);
   m_data_set_processed = update_data_set(get_data_reader(mode), mode);
 
   // Kick off background I/O once the forward prop phase is complete.
@@ -290,29 +321,27 @@ bool buffered_data_coordinator<TensorDataType>::update_data_set(generic_data_rea
 }
 
 template <typename TensorDataType>
-void buffered_data_coordinator<TensorDataType>::distribute_from_local_matrix(execution_mode mode, std::map<input_data_type, AbsDistMatrixType*>& input_buffers) {
+void buffered_data_coordinator<TensorDataType>::distribute_from_local_matrix(
+  execution_mode mode,
+  data_field_type const data_field,
+  AbsDistMatrixType& input_buffer)
+{
   prof_region_begin("distribute_from_local_matrix", prof_colors[3], false);
   data_buffer<IODataType>& buf = get_active_buffer(mode);
-  for(auto idt : input_data_type_iterator()) {
-    if(buf.m_input_buffers.count(idt)) {
-      if(input_buffers.count(idt)) {
-        view_or_copy_tensor(*buf.m_input_buffers[idt], *input_buffers[idt]);
-      }
-    }else {
-      if(input_buffers.count(idt)) {
-        LBANN_ERROR("Requested input data of type ", to_string(idt), " - no data in data coordinator");
-      }
-    }
+  if (buf.m_input_buffers.find(data_field) == buf.m_input_buffers.end()) {
+    LBANN_ERROR("Unknown data_field_type value requested: " + data_field);
   }
+  view_or_copy_tensor(*buf.m_input_buffers[data_field], input_buffer);
 #ifdef LBANN_HAS_DISTCONV
-  if (dc::is_cosmoflow_parallel_io_enabled() && input_buffers.count(input_data_type::RESPONSES)) {
-    auto& response = *(input_buffers[input_data_type::RESPONSES]);
-    El::Int new_width = response.Width() / dc::get_number_of_io_partitions();
-    if (response.Viewing()) {
-      El::LockedView(response, response, El::ALL, El::IR(0, new_width));
+  if (dc::is_cosmoflow_parallel_io_enabled() &&
+      data_field == INPUT_DATA_TYPE_RESPONSES) {
+    El::Int new_width =
+      input_buffer.Width() / dc::get_number_of_io_partitions();
+    if (input_buffer.Viewing()) {
+      El::LockedView(input_buffer, input_buffer, El::ALL, El::IR(0, new_width));
     }
     else {
-      response.Resize(response.Height(), new_width);
+      input_buffer.Resize(input_buffer.Height(), new_width);
     }
   }
 #endif
diff --git a/src/data_coordinator/data_coordinator.cpp b/src/data_coordinator/data_coordinator.cpp
index 3dd85c4e573..61251019483 100644
--- a/src/data_coordinator/data_coordinator.cpp
+++ b/src/data_coordinator/data_coordinator.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -36,6 +36,7 @@ template <class Archive>
 void data_coordinator::serialize( Archive & ar ) {
   ar(/*CEREAL_NVP(m_io_buffer),*/
      CEREAL_NVP(m_datasets)/*,
+     CEREAL_NVP(m_active_data_fields),
      CEREAL_NVP(m_data_readers),
      CEREAL_NVP(m_data_set_processed)*/);
 }
diff --git a/src/data_readers/CMakeLists.txt b/src/data_readers/CMakeLists.txt
index 9b6c149a364..91fa504345a 100644
--- a/src/data_readers/CMakeLists.txt
+++ b/src/data_readers/CMakeLists.txt
@@ -38,8 +38,5 @@ if (LBANN_HAS_OPENCV)
     "${CMAKE_CURRENT_SOURCE_DIR}/data_reader_imagenet.cpp")
 endif ()
 
-# Add the subdirectories
-add_subdirectory(utils)
-
 # Propagate the files up the tree
 set(SOURCES "${SOURCES}" "${THIS_DIR_SOURCES}" PARENT_SCOPE)
diff --git a/src/data_readers/data_reader.cpp b/src/data_readers/data_reader.cpp
index 00ce8d0890e..578b1c740fc 100644
--- a/src/data_readers/data_reader.cpp
+++ b/src/data_readers/data_reader.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -87,59 +87,43 @@ void generic_data_reader::setup(int num_io_threads, observer_ptr<thread_pool> io
   m_io_thread_pool = io_thread_pool;
 }
 
-int lbann::generic_data_reader::fetch(std::map<input_data_type, CPUMat*>& input_buffers, El::Matrix<El::Int>& indices_fetched) {
-  // Fetch sample
-  auto buf = input_buffers[input_data_type::SAMPLES];
-  if(buf == nullptr || buf->Height() == 0 || buf->Width() == 0) {
-    LBANN_ERROR("fetch function called with invalid buffer: h=", buf->Height(), " x ", buf->Width());
-  }
-  int num_samples_fetched = fetch_data(*(buf), indices_fetched);
-  // Fetch label is applicable
-  buf = input_buffers[input_data_type::LABELS];
-  if(has_labels() && buf != nullptr && buf->Height() != 0 && buf->Width() != 0) {
-    if(input_buffers[input_data_type::LABELS] == nullptr) {
-      LBANN_ERROR("LABELS is not defined");
-    }
-    int num_labels_fetched = fetch_labels(*(input_buffers[input_data_type::LABELS]));
-    if(num_labels_fetched != num_samples_fetched) {
-      LBANN_ERROR("Number of samples: ",
-                  std::to_string(num_samples_fetched),
-                  " does not match the number of labels: ",
-                  std::to_string(num_labels_fetched));
+int lbann::generic_data_reader::fetch(
+  std::map<data_field_type, CPUMat*>& input_buffers,
+  El::Matrix<El::Int>& indices_fetched)
+{
+  // Check to make sure that a valid map was passed
+  if (input_buffers.empty()) {
+    LBANN_ERROR("fetch function called with no valid buffer");
+  }
+  //  Check that all buffers within the map are valid and hold the
+  //  same number of samples
+  El::Int buffer_width = 0;
+  for (auto& [data_field, buf] : input_buffers) {
+    if (buf == nullptr || buf->Height() == 0 || buf->Width() == 0) {
+      LBANN_ERROR("fetch function called with invalid buffer: h=",
+                  buf->Height(),
+                  " x ",
+                  buf->Width(),
+                  " for data field ",
+                  data_field);
     }
-  }
-  // Fetch response is applicable
-  buf = input_buffers[input_data_type::RESPONSES];
-  if(has_responses() && buf != nullptr && buf->Height() != 0 && buf->Width() != 0) {
-    int num_responses_fetched = fetch_responses(*(input_buffers[input_data_type::RESPONSES]));
-    if(num_responses_fetched != num_samples_fetched) {
-      LBANN_ERROR("Number of samples: ",
-                  std::to_string(num_samples_fetched),
-                  " does not match the number of responses: ",
-                  std::to_string(num_responses_fetched));
+    if (buffer_width == 0) {
+      buffer_width = buf->Width();
     }
-  }
-  return num_samples_fetched;
-}
-
-bool lbann::generic_data_reader::fetch_data_block(CPUMat& X, El::Int block_offset, El::Int block_stride, El::Int mb_size, El::Matrix<El::Int>& indices_fetched) {
-  locked_io_rng_ref io_rng = set_io_generators_local_index(block_offset);
-
-  for (int s = block_offset; s < mb_size; s+=block_stride) {
-    int n = m_current_pos + (s * m_sample_stride);
-    int index = m_shuffled_indices[n];
-    bool valid = fetch_datum(X, index, s);
-    if (!valid) {
-      LBANN_ERROR("invalid datum (index ", std::to_string(index), ")");
+    else {
+      if (buffer_width != buf->Width()) {
+        LBANN_ERROR("fetch function called with a set of buffers that have "
+                    "mismatched widths: h=",
+                    buf->Height(),
+                    " x ",
+                    buf->Width(),
+                    " for data field ",
+                    data_field);
+      }
     }
-    indices_fetched.Set(s, 0, index);
   }
 
-  return true;
-}
-
-int lbann::generic_data_reader::fetch_data(CPUMat& X, El::Matrix<El::Int>& indices_fetched) {
-  #ifdef DEBUG
+#ifdef DEBUG
   if (m_current_pos == 0) {
     if (is_master()) {
       std::cout << "role: " << get_role() << " model: " << m_trainer->get_name()
@@ -155,8 +139,10 @@ int lbann::generic_data_reader::fetch_data(CPUMat& X, El::Matrix<El::Int>& indic
   int loaded_batch_size = get_loaded_mini_batch_size();
 
   const int end_pos = std::min(static_cast<size_t>(m_current_pos+loaded_batch_size), m_shuffled_indices.size());
-  const int mb_size = std::min(El::Int{((end_pos - m_current_pos) + m_sample_stride - 1) / m_sample_stride},
-      X.Width());
+  const int mb_size =
+    std::min(El::Int{((end_pos - m_current_pos) + m_sample_stride - 1) /
+                     m_sample_stride},
+             buffer_width);
 
   if(!position_valid()) {
     if(position_is_overrun()) {
@@ -180,21 +166,37 @@ int lbann::generic_data_reader::fetch_data(CPUMat& X, El::Matrix<El::Int>& indic
     set_jag_variables(mb_size);
   }
 
+  // BVE FIXME - for the time being certain data fields, such as the
+  // labels have to be zeroed out because they will typically only
+  // set the single index corresponding to the categorical value.
+  // With general data fields this will have to be the responsibilty
+  // of the concrete data reader.
+  if (has_labels() &&
+      input_buffers.find(INPUT_DATA_TYPE_LABELS) != input_buffers.end()) {
+    auto& buf = input_buffers[INPUT_DATA_TYPE_LABELS];
+    El::Zeros_seq(*buf, buf->Height(), buf->Width());
+  }
+
   // Fetch data is executed by the thread pool so it has to dispatch
   // work to other threads in the thread pool and do some work locally
   for (int t = 0; t < static_cast<int>(m_io_thread_pool->get_num_threads()); t++) {
     // Queue up work into other threads and then finish off the
     // mini-batch in the active thread
-    if(t == m_io_thread_pool->get_local_thread_id()) {
+    if (t == m_io_thread_pool->get_local_thread_id()) {
       continue;
-    }else {
+    }
+    else {
       m_io_thread_pool->submit_job_to_work_group(
-        std::bind(&generic_data_reader::fetch_data_block, this, std::ref(X), t,
+        std::bind(&generic_data_reader::fetch_data_block,
+                  this,
+                  std::ref(input_buffers),
+                  t,
                   m_io_thread_pool->get_num_threads(),
-                  mb_size, std::ref(indices_fetched)));
+                  mb_size,
+                  std::ref(indices_fetched)));
     }
   }
-  fetch_data_block(X,
+  fetch_data_block(input_buffers,
                    m_io_thread_pool->get_local_thread_id(),
                    m_io_thread_pool->get_num_threads(),
                    mb_size,
@@ -212,6 +214,68 @@ int lbann::generic_data_reader::fetch_data(CPUMat& X, El::Matrix<El::Int>& indic
   return mb_size;
 }
 
+bool lbann::generic_data_reader::fetch_data_block(
+  std::map<data_field_type, CPUMat*>& input_buffers,
+  El::Int block_offset,
+  El::Int block_stride,
+  El::Int mb_size,
+  El::Matrix<El::Int>& indices_fetched)
+{
+  locked_io_rng_ref io_rng = set_io_generators_local_index(block_offset);
+
+  //  CPUMat& X
+  for (int s = block_offset; s < mb_size; s += block_stride) {
+    int n = m_current_pos + (s * m_sample_stride);
+    int index = m_shuffled_indices[n];
+    indices_fetched.Set(s, 0, index);
+
+    for (auto& [data_field, buf] : input_buffers) {
+      bool valid = false;
+      if (data_field == INPUT_DATA_TYPE_SAMPLES) {
+        if (buf == nullptr || buf->Height() == 0 || buf->Width() == 0) {
+          LBANN_ERROR(
+            "fetch_data_block function called with invalid buffer: h=",
+            buf->Height(),
+            " x ",
+            buf->Width());
+        }
+        valid = fetch_datum(*buf, index, s);
+        if (!valid) {
+          LBANN_ERROR("invalid datum (index ", std::to_string(index), ")");
+        }
+      }
+      else if (data_field == INPUT_DATA_TYPE_LABELS && has_labels()) {
+        if (buf == nullptr || buf->Height() == 0 || buf->Width() == 0) {
+          LBANN_ERROR(
+            "fetch_data_block function called with invalid buffer: h=",
+            buf->Height(),
+            " x ",
+            buf->Width());
+        }
+        valid = fetch_label(*buf, index, s);
+        if (!valid) {
+          LBANN_ERROR("invalid datum (index ", std::to_string(index), ")");
+        }
+      }
+      else if (data_field == INPUT_DATA_TYPE_RESPONSES && has_responses()) {
+        if (buf == nullptr || buf->Height() == 0 || buf->Width() == 0) {
+          LBANN_ERROR(
+            "fetch_data_block function called with invalid buffer: h=",
+            buf->Height(),
+            " x ",
+            buf->Width());
+        }
+        valid = fetch_response(*buf, index, s);
+        if (!valid) {
+          LBANN_ERROR("invalid datum (index ", std::to_string(index), ")");
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
 void lbann::generic_data_reader::set_jag_variables(int mb_size) {
   // all min_batches have the same number of indices;
   // this probably causes a few indices to be discarded,
@@ -238,73 +302,6 @@ void lbann::generic_data_reader::set_jag_variables(int mb_size) {
   m_world_master_mini_batch_adjustment = 0;
 }
 
-int lbann::generic_data_reader::fetch_labels(CPUMat& Y) {
-  int loaded_batch_size = get_loaded_mini_batch_size();
-  const int end_pos = std::min(static_cast<size_t>(m_current_pos+loaded_batch_size),
-                               m_shuffled_indices.size());
-  const int mb_size = std::min(
-    El::Int{((end_pos - m_current_pos) + m_sample_stride - 1) / m_sample_stride},
-    Y.Width());
-
-  El::Zeros_seq(Y, Y.Height(), Y.Width());
-
-  if(!position_valid()) {
-    if(position_is_overrun()) {
-      return 0;
-    }else {
-      LBANN_ERROR(std::string{} + "generic data reader load error: !position_valid"
-                  + " -- current pos = " + std::to_string(m_current_pos)
-                  + " and there are " + std::to_string(m_shuffled_indices.size()) + " indices");
-    }
-  }
-
-  std::string error_message;
-  for (int s = 0; s < mb_size; s++) {
-    int n = m_current_pos + (s * m_sample_stride);
-    int index = m_shuffled_indices[n];
-    bool valid = fetch_label(Y, index, s);
-    if (!valid) {
-      error_message = "invalid label (index " + std::to_string(index) + ")";
-    }
-  }
-  if (!error_message.empty()) { LBANN_ERROR(error_message); }
-
-  return mb_size;
-}
-
-int lbann::generic_data_reader::fetch_responses(CPUMat& Y) {
-  int loaded_batch_size = get_loaded_mini_batch_size();
-  const int end_pos = std::min(static_cast<size_t>(m_current_pos+loaded_batch_size),
-                               m_shuffled_indices.size());
-  const int mb_size = std::min(
-    El::Int{((end_pos - m_current_pos) + m_sample_stride - 1) / m_sample_stride},
-    Y.Width());
-
-  El::Zeros_seq(Y, Y.Height(), Y.Width());
-
-  if(!position_valid()) {
-    if(position_is_overrun()) {
-      return 0;
-    }else {
-      LBANN_ERROR(std::string{} + "generic data reader load error: !position_valid"
-                  + " -- current pos = " + std::to_string(m_current_pos)
-                  + " and there are " + std::to_string(m_shuffled_indices.size()) + " indices");
-    }
-  }
-
-  std::string error_message;
-  for (int s = 0; s < mb_size; s++) {
-    int n = m_current_pos + (s * m_sample_stride);
-    int index = m_shuffled_indices[n];
-    bool valid = fetch_response(Y, index, s);
-    if (!valid) {
-      error_message = "invalid response (index " + std::to_string(index) + ")";
-    }
-  }
-  if (!error_message.empty()) { LBANN_ERROR(error_message); }
-  return mb_size;
-}
-
 bool generic_data_reader::update(bool is_active_reader) {
   bool reader_not_done = true; // BVE The sense of this should be fixed
   m_current_mini_batch_idx++;
@@ -686,10 +683,10 @@ double generic_data_reader::get_use_percent() const {
 void generic_data_reader::instantiate_data_store() {
   double tm1 = get_time();
   auto& arg_parser = global_argument_parser();
-  if (! (arg_parser.get<bool>(USE_DATA_STORE) ||
-         arg_parser.get<bool>(PRELOAD_DATA_STORE) ||
-         arg_parser.get<bool>(DATA_STORE_CACHE) ||
-         arg_parser.get<std::string>(DATA_STORE_SPILL) != "")) {
+  if (!(arg_parser.get<bool>(USE_DATA_STORE) ||
+        arg_parser.get<bool>(PRELOAD_DATA_STORE) ||
+        arg_parser.get<bool>(DATA_STORE_CACHE) ||
+        arg_parser.get<std::string>(DATA_STORE_SPILL) != "")) {
     if (m_data_store != nullptr) {
       delete m_data_store;
       m_data_store = nullptr;
@@ -777,8 +774,8 @@ void generic_data_reader::set_mini_batch_size(const int s) {
 
 void generic_data_reader::set_role(std::string role) {
   m_role = role;
-  if (global_argument_parser().get<bool>(JAG_PARTITIONED)
-      && get_role() == "train") {
+  if (global_argument_parser().get<bool>(JAG_PARTITIONED) &&
+      get_role() == "train") {
     m_jag_partitioned = true;
     if (is_master()) {
       std::cout << "USING JAG DATA PARTITIONING\n";
diff --git a/src/data_readers/data_reader_HDF5.cpp b/src/data_readers/data_reader_HDF5.cpp
index 481044f1a86..d541d016b94 100644
--- a/src/data_readers/data_reader_HDF5.cpp
+++ b/src/data_readers/data_reader_HDF5.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -223,7 +223,7 @@ void hdf5_data_reader::load()
   // May go away; for now, this reader only supports preloading mode
   // with data store
   // TODO MRW
-  //opts->set_option("preload_data_store", true);
+  // opts->set_option("preload_data_store", true);
 
   // Load the sample list(s)
   data_reader_sample_list::load();
@@ -897,14 +897,14 @@ void hdf5_data_reader::construct_linearized_size_lookup_tables()
   }
 }
 
-bool hdf5_data_reader::fetch(std::string which,
+bool hdf5_data_reader::fetch(data_field_type data_field,
                              CPUMat& Y,
                              int data_id,
                              int mb_idx)
 {
   size_t n_elts = 0;
   std::string dtype;
-  const void* d = get_data(data_id, which, n_elts, dtype);
+  const void* d = get_data(data_id, data_field, n_elts, dtype);
 
   if (dtype == "float64") {
     const conduit::float64* data = reinterpret_cast<const conduit::float64*>(d);
@@ -1027,7 +1027,7 @@ void hdf5_data_reader::set_experiment_schema(const conduit::Node& s)
 // Note to developers and reviewer: this is very conduit-ishy; I keep thinking
 // there's a simpler, more elegant way to do this, but I'm not seeing it.
 const void* hdf5_data_reader::get_data(const size_t sample_id_in,
-                                       std::string field_name_in,
+                                       data_field_type data_field,
                                        size_t& num_elts_out,
                                        std::string& dtype_out) const
 {
@@ -1035,7 +1035,7 @@ const void* hdf5_data_reader::get_data(const size_t sample_id_in,
   // get the pathname to the data, and verify it exists in the conduit::Node
   const conduit::Node& node = m_data_store->get_conduit_node(sample_id_in);
   std::ostringstream ss;
-  ss << node.name() << node.child(0).name() + "/" << field_name_in;
+  ss << node.name() << node.child(0).name() + "/" << data_field;
   if (!node.has_path(ss.str())) {
     LBANN_ERROR("no path: ", ss.str());
   }
diff --git a/src/data_readers/data_reader_cifar10.cpp b/src/data_readers/data_reader_cifar10.cpp
index a0489dbdaab..99d3568c017 100644
--- a/src/data_readers/data_reader_cifar10.cpp
+++ b/src/data_readers/data_reader_cifar10.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -44,7 +44,7 @@ void cifar10_reader::set_defaults() {
   m_image_num_channels = 3;
   set_linearized_image_size();
   m_num_labels = 10;
-  m_supported_input_types[input_data_type::LABELS] = true;
+  m_supported_input_types[INPUT_DATA_TYPE_LABELS] = true;
 }
 
 void cifar10_reader::load() {
diff --git a/src/data_readers/data_reader_csv.cpp b/src/data_readers/data_reader_csv.cpp
index 44ebf9a781a..f5a59108fc2 100644
--- a/src/data_readers/data_reader_csv.cpp
+++ b/src/data_readers/data_reader_csv.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -38,9 +38,9 @@ namespace lbann {
 csv_reader::csv_reader(bool shuffle)
   : generic_data_reader(shuffle) {
   // By default assume that there are labels in the CSV data set
-  m_supported_input_types[input_data_type::LABELS] = true;
+  m_supported_input_types[INPUT_DATA_TYPE_LABELS] = true;
   // By default assume that there are not responses in the CSV data set
-  m_supported_input_types[input_data_type::RESPONSES] = false;
+  m_supported_input_types[INPUT_DATA_TYPE_RESPONSES] = false;
 }
 
 csv_reader::csv_reader(const csv_reader& other) :
diff --git a/src/data_readers/data_reader_hdf5_legacy.cpp b/src/data_readers/data_reader_hdf5_legacy.cpp
index 85785a0adda..0f76cf3097a 100644
--- a/src/data_readers/data_reader_hdf5_legacy.cpp
+++ b/src/data_readers/data_reader_hdf5_legacy.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -55,17 +55,17 @@ namespace lbann {
 
 template <typename TensorDataType>
 hdf5_reader<TensorDataType>::hdf5_reader(const bool shuffle,
-                         const std::string key_data,
-                         const std::string key_labels,
-                         const std::string key_responses,
-                         const bool hyperslab_labels)
-    : generic_data_reader(shuffle),
-      m_use_data_store(global_argument_parser().get<bool>(USE_DATA_STORE)),
-      m_key_data(key_data),
-      m_key_labels(key_labels),
-      m_key_responses(key_responses),
-      m_hyperslab_labels(hyperslab_labels) {
-}
+                                         const std::string key_data,
+                                         const std::string key_labels,
+                                         const std::string key_responses,
+                                         const bool hyperslab_labels)
+  : generic_data_reader(shuffle),
+    m_use_data_store(global_argument_parser().get<bool>(USE_DATA_STORE)),
+    m_key_data(key_data),
+    m_key_labels(key_labels),
+    m_key_responses(key_responses),
+    m_hyperslab_labels(hyperslab_labels)
+{}
 
 template <typename TensorDataType>
 hdf5_reader<TensorDataType>::hdf5_reader(const hdf5_reader& rhs)  : generic_data_reader(rhs) {
diff --git a/src/data_readers/data_reader_image.cpp b/src/data_readers/data_reader_image.cpp
index d650026b6dd..d4cd515b667 100644
--- a/src/data_readers/data_reader_image.cpp
+++ b/src/data_readers/data_reader_image.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -178,10 +178,11 @@ void image_data_reader::load() {
     }
     m_sample_list.write(s.str());
   }
-  if (arg_parser.get<bool>("write_sample_label_list") && m_comm->am_trainer_master()) {
+  if (arg_parser.get<bool>("write_sample_label_list") &&
+      m_comm->am_trainer_master()) {
     if (!(m_keep_sample_order || arg_parser.get<bool>(KEEP_SAMPLE_ORDER))) {
-    std::cout << "Writting sample label list without the option "
-              << "`keep_sample_order' set." << std::endl;
+      std::cout << "Writting sample label list without the option "
+                << "`keep_sample_order' set." << std::endl;
     }
     std::string dump_file = "image_list.trainer"
                           + std::to_string(m_comm->get_trainer_rank())
@@ -231,12 +232,13 @@ void image_data_reader::do_preload_data_store() {
 
   int rank = m_comm->get_rank_in_trainer();
 
-  bool threaded = ! arg_parser.get<bool>(DATA_STORE_NO_THREAD);
+  bool threaded = !arg_parser.get<bool>(DATA_STORE_NO_THREAD);
   if (threaded) {
     if (is_master()) {
       std::cout << "mode: data_store_thread\n";
     }
-    std::shared_ptr<thread_pool> io_thread_pool = construct_io_thread_pool(m_comm, false);
+    std::shared_ptr<thread_pool> io_thread_pool =
+      construct_io_thread_pool(m_comm, false);
     int num_threads = static_cast<int>(io_thread_pool->get_num_threads());
 
     std::vector<std::unordered_set<int>> data_ids(num_threads);
@@ -333,7 +335,8 @@ void image_data_reader::load_list_of_samples(const std::string sample_list_file)
 
   if (m_keep_sample_order || arg_parser.get<bool>(KEEP_SAMPLE_ORDER)) {
     m_sample_list.keep_sample_order(true);
-  } else {
+  }
+  else {
     m_sample_list.keep_sample_order(false);
   }
 
@@ -354,7 +357,8 @@ void image_data_reader::load_list_of_samples(const std::string sample_list_file)
 
     m_sample_list.set_sample_list_name(sample_list_file);
     m_sample_list.load(iss, *m_comm, true);
-  } else {
+  }
+  else {
     m_sample_list.load(sample_list_file, *m_comm, true);
   }
 
@@ -424,7 +428,8 @@ void image_data_reader::gen_list_of_samples() {
 
   if (m_keep_sample_order || arg_parser.get<bool>(KEEP_SAMPLE_ORDER)) {
     m_sample_list.keep_sample_order(true);
-  } else {
+  }
+  else {
     m_sample_list.keep_sample_order(false);
   }
 
@@ -457,7 +462,8 @@ void image_data_reader::gen_list_of_samples() {
     vectorwrapbuf<char> strmbuf(buffer);
     std::istream iss(&strmbuf);
     m_sample_list.load(header, iss, *m_comm, true);
-  } else {
+  }
+  else {
     // The trainer master counts the number of samples (lines) and broadcasts
     // the result
     size_t num_samples = 0ul;
diff --git a/src/data_readers/data_reader_imagenet.cpp b/src/data_readers/data_reader_imagenet.cpp
index 5e2e81fa47c..fa926a6e8ba 100644
--- a/src/data_readers/data_reader_imagenet.cpp
+++ b/src/data_readers/data_reader_imagenet.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -46,7 +46,7 @@ void imagenet_reader::set_defaults() {
   m_image_num_channels = 3;
   set_linearized_image_size();
   m_num_labels = 1000;
-  m_supported_input_types[input_data_type::LABELS] = true;
+  m_supported_input_types[INPUT_DATA_TYPE_LABELS] = true;
 }
 
 CPUMat imagenet_reader::create_datum_view(CPUMat& X, const int mb_idx) const {
diff --git a/src/data_readers/data_reader_jag_conduit.cpp b/src/data_readers/data_reader_jag_conduit.cpp
index 44e0258dec9..5ddd44da5b7 100644
--- a/src/data_readers/data_reader_jag_conduit.cpp
+++ b/src/data_readers/data_reader_jag_conduit.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -229,8 +229,8 @@ void data_reader_jag_conduit::set_defaults() {
   m_list_per_trainer = false;
   m_list_per_model = false;
 
-  m_supported_input_types[input_data_type::LABELS] = true;
-  m_supported_input_types[input_data_type::RESPONSES] = true;
+  m_supported_input_types[INPUT_DATA_TYPE_LABELS] = true;
+  m_supported_input_types[INPUT_DATA_TYPE_RESPONSES] = true;
 }
 
 void data_reader_jag_conduit::setup(int num_io_threads, observer_ptr<thread_pool> io_thread_pool) {
@@ -884,7 +884,8 @@ void data_reader_jag_conduit::load_list_of_samples(const std::string sample_list
 
   if (this->m_keep_sample_order || arg_parser.get<bool>(KEEP_SAMPLE_ORDER)) {
     m_sample_list.keep_sample_order(true);
-  } else {
+  }
+  else {
     m_sample_list.keep_sample_order(false);
   }
 
@@ -907,7 +908,8 @@ void data_reader_jag_conduit::load_list_of_samples(const std::string sample_list
 
     m_sample_list.set_sample_list_name(sample_list_file);
     m_sample_list.load(iss, *(this->m_comm), true);
-  } else {
+  }
+  else {
     m_sample_list.load(sample_list_file, *(this->m_comm), true);
   }
 
@@ -1423,41 +1425,14 @@ bool data_reader_jag_conduit::fetch(CPUMat& X, int data_id, conduit::Node& sampl
   return true;
 }
 
-int data_reader_jag_conduit::reuse_data(CPUMat& X) {
-  El::Copy(m_data_cache, X);
-  return m_cached_data_mb_size;
-}
 
-int data_reader_jag_conduit::reuse_responses(CPUMat& Y) {
-  El::Copy(m_response_cache, Y);
-  return m_cached_response_mb_size;
-}
 
-int data_reader_jag_conduit::reuse_labels(CPUMat& Y) {
-  El::Copy(m_label_cache, Y);
-  return m_cached_label_mb_size;
-}
 
-int data_reader_jag_conduit::fetch_data(CPUMat& X, El::Matrix<El::Int>& indices_fetched) {
-  m_cached_data_mb_size = generic_data_reader::fetch_data(X, indices_fetched);
-  El::Copy(X, m_data_cache);
 
-  return m_cached_data_mb_size;
-}
 
-int data_reader_jag_conduit::fetch_responses(CPUMat& Y) {
-  m_cached_response_mb_size = generic_data_reader::fetch_responses(Y);
-  El::Copy(Y, m_response_cache);
 
-  return m_cached_response_mb_size;
-}
 
-int data_reader_jag_conduit::fetch_labels(CPUMat& Y) {
-  m_cached_label_mb_size = generic_data_reader::fetch_labels(Y);
-  El::Copy(Y, m_label_cache);
 
-  return m_cached_label_mb_size;
-}
 
 
 bool data_reader_jag_conduit::fetch_datum(CPUMat& X, int data_id, int mb_idx) {
diff --git a/src/data_readers/data_reader_merge_samples.cpp b/src/data_readers/data_reader_merge_samples.cpp
index 35ae783164a..9eb59726b4e 100644
--- a/src/data_readers/data_reader_merge_samples.cpp
+++ b/src/data_readers/data_reader_merge_samples.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Semy_num_readersity, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -35,7 +35,7 @@ data_reader_merge_samples::data_reader_merge_samples(
   std::vector<generic_data_reader*> data_readers,
   bool shuffle) :
   generic_compound_data_reader(data_readers, shuffle) {
-  m_supported_input_types[input_data_type::RESPONSES] = true;
+  m_supported_input_types[INPUT_DATA_TYPE_RESPONSES] = true;
 }
 
 data_reader_merge_samples::data_reader_merge_samples(
diff --git a/src/data_readers/data_reader_mesh.cpp b/src/data_readers/data_reader_mesh.cpp
index 56f63f0d28f..9b10130638b 100644
--- a/src/data_readers/data_reader_mesh.cpp
+++ b/src/data_readers/data_reader_mesh.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -34,7 +34,7 @@ namespace lbann {
 
 mesh_reader::mesh_reader(bool shuffle)
   : generic_data_reader(shuffle) {
-  m_supported_input_types[input_data_type::RESPONSES] = true;
+  m_supported_input_types[INPUT_DATA_TYPE_RESPONSES] = true;
 }
 
 void mesh_reader::load() {
diff --git a/src/data_readers/data_reader_mnist.cpp b/src/data_readers/data_reader_mnist.cpp
index 6f831fff448..972f1f07a08 100644
--- a/src/data_readers/data_reader_mnist.cpp
+++ b/src/data_readers/data_reader_mnist.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -47,7 +47,7 @@ void mnist_reader::set_defaults() {
   m_image_num_channels = 1;
   set_linearized_image_size();
   m_num_labels = 10;
-  m_supported_input_types[input_data_type::LABELS] = true;
+  m_supported_input_types[INPUT_DATA_TYPE_LABELS] = true;
 }
 
 bool mnist_reader::fetch_datum(CPUMat& X, int data_id, int mb_idx) {
diff --git a/src/data_readers/data_reader_npz_ras_lipid.cpp b/src/data_readers/data_reader_npz_ras_lipid.cpp
index 07681523aad..a578976b215 100644
--- a/src/data_readers/data_reader_npz_ras_lipid.cpp
+++ b/src/data_readers/data_reader_npz_ras_lipid.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -84,7 +84,7 @@ void ras_lipid_conduit_data_reader::load() {
 
   auto& arg_parser = global_argument_parser();
   // TODO MRW
-  //opts->set_option(PRELOAD_DATA_STORE, 1);
+  // opts->set_option(PRELOAD_DATA_STORE, 1);
 
   // Error check some settings
   size_t count = get_absolute_sample_count();
@@ -105,7 +105,8 @@ void ras_lipid_conduit_data_reader::load() {
   // of samples before we sequentially-concatenate them)
   if (arg_parser.get<std::string>("pilot2_read_file_sizes") != "") {
     read_file_sizes();
-  } else {
+  }
+  else {
     double tm3 = get_time();
     get_samples_per_file();
     if (is_master()) std::cout << "time to compute samples_per_file: " << get_time() - tm3 << std::endl;
@@ -442,7 +443,8 @@ void ras_lipid_conduit_data_reader::write_file_sizes() {
   if (! is_master()) {
     return;
   }
-  std::string fn = global_argument_parser().get<std::string>(PILOT2_SAVE_FILE_SIZES);
+  std::string fn =
+    global_argument_parser().get<std::string>(PILOT2_SAVE_FILE_SIZES);
   std::ofstream out(fn.c_str());
   if (!out) {
     LBANN_ERROR("failed to open ", fn, " for writing");
@@ -454,7 +456,8 @@ void ras_lipid_conduit_data_reader::write_file_sizes() {
 }
 
 void ras_lipid_conduit_data_reader::read_file_sizes() {
-  std::string fn = global_argument_parser().get<std::string>(PILOT2_READ_FILE_SIZES);
+  std::string fn =
+    global_argument_parser().get<std::string>(PILOT2_READ_FILE_SIZES);
   std::ifstream in(fn.c_str());
   if (!in) {
     LBANN_ERROR("failed to open ", fn, " for reading");
@@ -481,7 +484,7 @@ void ras_lipid_conduit_data_reader::read_normalization_data() {
   m_use_z_score = false;
   auto& arg_parser = global_argument_parser();
   if (arg_parser.get<std::string>(NORMALIZATION) != "") {
-   m_use_min_max = true;
+    m_use_min_max = true;
     m_use_z_score = arg_parser.get<bool>(Z_SCORE);
     if (is_master()) {
       if (m_use_z_score) {
@@ -513,7 +516,8 @@ void ras_lipid_conduit_data_reader::read_normalization_data() {
     if (m_min.size() != 14) {
       LBANN_ERROR("normalization.size() = ", m_min.size(), "; should be 14");
     }
-  } else {
+  }
+  else {
     if (is_master()) {
       std::cout << "NOT Normalizing data!" << std::endl;
     }
diff --git a/src/data_readers/data_reader_numpy.cpp b/src/data_readers/data_reader_numpy.cpp
index a1a9741483d..761fbc5527d 100644
--- a/src/data_readers/data_reader_numpy.cpp
+++ b/src/data_readers/data_reader_numpy.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -83,12 +83,13 @@ void numpy_reader::load() {
       "numpy_reader: fortran order not supported");
   }
   // Don't currently support both labels and responses.
-  if (m_supported_input_types[input_data_type::LABELS] && m_supported_input_types[input_data_type::RESPONSES]) {
+  if (m_supported_input_types[INPUT_DATA_TYPE_LABELS] &&
+      m_supported_input_types[INPUT_DATA_TYPE_RESPONSES]) {
     throw lbann_exception(
       "numpy_reader: labels and responses not supported at same time");
   }
 
-  if (m_supported_input_types[input_data_type::LABELS]) {
+  if (m_supported_input_types[INPUT_DATA_TYPE_LABELS]) {
     // Shift feature count because the last becomes the label.
     m_num_features -= 1;
     // Determine number of label classes.
@@ -114,7 +115,7 @@ void numpy_reader::load() {
     }
     m_num_labels = label_classes.size();
   }
-  if (m_supported_input_types[input_data_type::RESPONSES]) {
+  if (m_supported_input_types[INPUT_DATA_TYPE_RESPONSES]) {
     // Last feature becomes the response.
     m_num_features -= 1;
   }
@@ -129,7 +130,8 @@ void numpy_reader::load() {
 
 bool numpy_reader::fetch_datum(Mat& X, int data_id, int mb_idx) {
   int features_size = m_num_features;
-  if (m_supported_input_types[input_data_type::LABELS] || m_supported_input_types[input_data_type::RESPONSES]) {
+  if (m_supported_input_types[INPUT_DATA_TYPE_LABELS] ||
+      m_supported_input_types[INPUT_DATA_TYPE_RESPONSES]) {
     features_size += 1;
   }
   if (m_data.word_size == 4) {
@@ -147,7 +149,7 @@ bool numpy_reader::fetch_datum(Mat& X, int data_id, int mb_idx) {
 }
 
 bool numpy_reader::fetch_label(Mat& Y, int data_id, int mb_idx) {
-  if (!m_supported_input_types[input_data_type::LABELS]) {
+  if (!m_supported_input_types[INPUT_DATA_TYPE_LABELS]) {
     throw lbann_exception("numpy_reader: do not have labels");
   }
   int label = 0;
@@ -163,7 +165,7 @@ bool numpy_reader::fetch_label(Mat& Y, int data_id, int mb_idx) {
 }
 
 bool numpy_reader::fetch_response(Mat& Y, int data_id, int mb_idx) {
-  if (!m_supported_input_types[input_data_type::RESPONSES]) {
+  if (!m_supported_input_types[INPUT_DATA_TYPE_RESPONSES]) {
     throw lbann_exception("numpy_reader: do not have responses");
   }
   auto response = DataType(0);
diff --git a/src/data_readers/data_reader_numpy_npz.cpp b/src/data_readers/data_reader_numpy_npz.cpp
index dcfec2f1eaa..e60b082228f 100644
--- a/src/data_readers/data_reader_numpy_npz.cpp
+++ b/src/data_readers/data_reader_numpy_npz.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -23,7 +23,8 @@
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
 //
-// data_reader_numpy_npz .hpp .cpp - generic_data_reader class for numpy .npz dataset
+// data_reader_numpy_npz .hpp .cpp - generic_data_reader class for numpy .npz
+// dataset
 ////////////////////////////////////////////////////////////////////////////////
 
 #include "lbann/data_readers/data_reader_numpy_npz.hpp"
@@ -81,8 +82,14 @@ namespace lbann {
 
     std::vector<std::tuple<const bool, const std::string, cnpy::NpyArray &> > npyLoadList;
     npyLoadList.push_back(std::forward_as_tuple(true,            NPZ_KEY_DATA,      m_data));
-    npyLoadList.push_back(std::forward_as_tuple(m_supported_input_types[input_data_type::LABELS],    NPZ_KEY_LABELS,    m_labels));
-    npyLoadList.push_back(std::forward_as_tuple(m_supported_input_types[input_data_type::RESPONSES], NPZ_KEY_RESPONSES, m_responses));
+    npyLoadList.push_back(
+      std::forward_as_tuple(m_supported_input_types[INPUT_DATA_TYPE_LABELS],
+                            NPZ_KEY_LABELS,
+                            m_labels));
+    npyLoadList.push_back(
+      std::forward_as_tuple(m_supported_input_types[INPUT_DATA_TYPE_RESPONSES],
+                            NPZ_KEY_RESPONSES,
+                            m_responses));
     for(const auto& npyLoad : npyLoadList) {
       // Check whether the tensor have to be loaded.
       if(!std::get<0>(npyLoad)) {
@@ -114,7 +121,7 @@ namespace lbann {
                                      m_data.shape.end(),
                                      (unsigned) 1,
                                      std::multiplies<unsigned>());
-    if(m_supported_input_types[input_data_type::RESPONSES]) {
+    if (m_supported_input_types[INPUT_DATA_TYPE_RESPONSES]) {
       m_num_response_features = std::accumulate(m_responses.shape.begin() + 1,
                                                 m_responses.shape.end(),
                                                 (unsigned) 1,
@@ -127,7 +134,7 @@ namespace lbann {
                             " not supported");
     }
 
-    if (m_supported_input_types[input_data_type::LABELS]) {
+    if (m_supported_input_types[INPUT_DATA_TYPE_LABELS]) {
       // Determine number of label classes.
       std::unordered_set<int> label_classes;
       if (m_labels.word_size != 4) {
@@ -183,7 +190,7 @@ namespace lbann {
   }
 
   bool numpy_npz_reader::fetch_label(Mat& Y, int data_id, int mb_idx) {
-    if (!m_supported_input_types[input_data_type::LABELS]) {
+    if (!m_supported_input_types[INPUT_DATA_TYPE_LABELS]) {
       throw lbann_exception("numpy_npz_reader: do not have labels");
     }
     const int label = m_labels.data<int>()[data_id];
@@ -192,7 +199,7 @@ namespace lbann {
   }
 
   bool numpy_npz_reader::fetch_response(Mat& Y, int data_id, int mb_idx) {
-    if (!m_supported_input_types[input_data_type::RESPONSES]) {
+    if (!m_supported_input_types[INPUT_DATA_TYPE_RESPONSES]) {
       throw lbann_exception("numpy_npz_reader: do not have responses");
     }
 
diff --git a/src/data_readers/data_reader_numpy_npz_conduit.cpp b/src/data_readers/data_reader_numpy_npz_conduit.cpp
index 0d1e162b303..5cdcc446bb3 100644
--- a/src/data_readers/data_reader_numpy_npz_conduit.cpp
+++ b/src/data_readers/data_reader_numpy_npz_conduit.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -79,7 +79,8 @@ void numpy_npz_conduit_reader::load() {
 
   auto& arg_parser = global_argument_parser();
 
-  if (! (arg_parser.get<bool>(PRELOAD_DATA_STORE) || arg_parser.get<bool>(USE_DATA_STORE))) {
+  if (!(arg_parser.get<bool>(PRELOAD_DATA_STORE) ||
+        arg_parser.get<bool>(USE_DATA_STORE))) {
     LBANN_ERROR("numpy_npz_conduit_reader requires data_store; please pass either --use_data_store or --preload_data_store on the cmd line");
   }
 
@@ -103,7 +104,8 @@ void numpy_npz_conduit_reader::load() {
   resize_shuffled_indices();
   m_num_samples = m_shuffled_indices.size();
 
-  if (m_num_labels == 0 && !arg_parser.get<bool>(PRELOAD_DATA_STORE) && arg_parser.get<bool>(USE_DATA_STORE)) {
+  if (m_num_labels == 0 && !arg_parser.get<bool>(PRELOAD_DATA_STORE) &&
+      arg_parser.get<bool>(USE_DATA_STORE)) {
     LBANN_WARNING("when not preloading you must specify the number of labels in the prototext file if you are doing classification");
   }
 
@@ -127,14 +129,15 @@ void numpy_npz_conduit_reader::do_preload_data_store() {
 
   std::unordered_set<int> label_classes;
 
-  bool threaded = ! global_argument_parser().get<bool>(DATA_STORE_NO_THREAD);
+  bool threaded = !global_argument_parser().get<bool>(DATA_STORE_NO_THREAD);
 
   //threaded mode
   if (threaded) {
     if (is_master()) {
       std::cout << "mode: data_store_thread\n";
     }
-    std::shared_ptr<thread_pool> io_thread_pool = construct_io_thread_pool(m_comm, false);
+    std::shared_ptr<thread_pool> io_thread_pool =
+      construct_io_thread_pool(m_comm, false);
     int num_threads = static_cast<int>(io_thread_pool->get_num_threads());
 
     //collect the set of indices that belong to this rank
@@ -182,7 +185,7 @@ void numpy_npz_conduit_reader::do_preload_data_store() {
   // Nikoli says we're not using labels, so I'm commenting this section out
   // (this section is a mess, anyway)
   #if 0
-  if (m_supported_input_types[input_data_type::LABELS]) {
+  if (m_supported_input_types[INPUT_DATA_TYPE_LABELS]) {
 
     // get max element. Yes, I know you can do this with, e.g, lambda
     // expressions and c++11 and etc, etc. But that's just B-ugly and
@@ -301,7 +304,7 @@ bool numpy_npz_conduit_reader::fetch_datum(Mat& X, int data_id, int mb_idx) {
 }
 
 bool numpy_npz_conduit_reader::fetch_label(Mat& Y, int data_id, int mb_idx) {
-  if (!m_supported_input_types[input_data_type::LABELS]) {
+  if (!m_supported_input_types[INPUT_DATA_TYPE_LABELS]) {
     LBANN_ERROR("numpy_npz_conduit_reader: do not have labels");
   }
   if (m_num_labels == 0) {
@@ -318,7 +321,7 @@ bool numpy_npz_conduit_reader::fetch_label(Mat& Y, int data_id, int mb_idx) {
 }
 
 bool numpy_npz_conduit_reader::fetch_response(Mat& Y, int data_id, int mb_idx) {
-  if (!m_supported_input_types[input_data_type::RESPONSES]) {
+  if (!m_supported_input_types[INPUT_DATA_TYPE_RESPONSES]) {
     LBANN_ERROR("numpy_npz_conduit_reader: do not have responses");
   }
 
@@ -405,14 +408,14 @@ void numpy_npz_conduit_reader::fill_in_metadata() {
     std::cout << "data word size: " << m_data_word_size << "\n";
   }
 
-  if (m_supported_input_types[input_data_type::LABELS]) {
+  if (m_supported_input_types[INPUT_DATA_TYPE_LABELS]) {
     word_size = node[LBANN_DATA_ID_STR(data_id) + "/frm/word_size"].value();
     if (word_size != 4) {
       LBANN_ERROR("numpy_npz_conduit_reader: label should be in int32, but word_size= " + std::to_string(word_size));
     }
   }
 
-  if (m_supported_input_types[input_data_type::RESPONSES]) {
+  if (m_supported_input_types[INPUT_DATA_TYPE_RESPONSES]) {
     m_response_word_size = node[LBANN_DATA_ID_STR(data_id) + "/responses/word_size"].value();
     auto r_shape = node[LBANN_DATA_ID_STR(data_id) + "/responses/shape"].as_uint64_array();
     int n = r_shape.number_of_elements();
diff --git a/src/data_readers/data_reader_python.cpp b/src/data_readers/data_reader_python.cpp
index bef4634177b..1b4788db99b 100644
--- a/src/data_readers/data_reader_python.cpp
+++ b/src/data_readers/data_reader_python.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -105,12 +105,15 @@ int python_reader::get_linearized_label_size() const {
   return get_num_labels();
 }
 
-bool python_reader::fetch_data_block(CPUMat& X,
-                                     El::Int block_offset,
-                                     El::Int block_stride,
-                                     El::Int mb_size,
-                                     El::Matrix<El::Int>& indices_fetched) {
+bool python_reader::fetch_data_block(
+  std::map<data_field_type, CPUMat*>& input_buffers,
+  El::Int block_offset,
+  El::Int block_stride,
+  El::Int mb_size,
+  El::Matrix<El::Int>& indices_fetched)
+{
 
+  CPUMat& X = *(input_buffers[INPUT_DATA_TYPE_SAMPLES]);
   // Acquire Python GIL on first IO thread
   // Note: Do nothing on other IO threads.
   if (block_offset != 0) { return true; }
diff --git a/src/data_readers/data_reader_smiles.cpp b/src/data_readers/data_reader_smiles.cpp
index 9c07dab8a7a..85497bbda08 100644
--- a/src/data_readers/data_reader_smiles.cpp
+++ b/src/data_readers/data_reader_smiles.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -122,7 +122,7 @@ void smiles_data_reader::load() {
     if (arg_parser.get<int>(SEQUENCE_LENGTH) == -1) {
       LBANN_ERROR("you must pass --sequence_length=<int> on the cmd line or call set_sequence_length()");
     }
-    m_sequence_length =  arg_parser.get<int>(SEQUENCE_LENGTH);
+    m_sequence_length = arg_parser.get<int>(SEQUENCE_LENGTH);
   }
   m_linearized_data_size = m_sequence_length+2;
 
diff --git a/src/data_readers/data_reader_synthetic.cpp b/src/data_readers/data_reader_synthetic.cpp
index 012db31307a..03dcb5cf1d4 100644
--- a/src/data_readers/data_reader_synthetic.cpp
+++ b/src/data_readers/data_reader_synthetic.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -23,7 +23,8 @@
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
 //
-// lbann_data_reader_synthetic .hpp .cpp - generic_data_reader class for synthetic (unit testing) data
+// lbann_data_reader_synthetic .hpp .cpp - generic_data_reader class for
+// synthetic (unit testing) data
 ////////////////////////////////////////////////////////////////////////////////
 
 #include "lbann/data_readers/data_reader_synthetic.hpp"
@@ -53,16 +54,28 @@ data_reader_synthetic::data_reader_synthetic(int num_samples, int num_features,
 
 data_reader_synthetic::data_reader_synthetic(int num_samples,
                                              std::vector<int> dims,
-                                             int num_labels, bool shuffle)
-  : generic_data_reader(shuffle), m_num_samples(num_samples),
-    m_num_labels(num_labels), m_dimensions(dims) {}
+                                             int num_labels,
+                                             bool shuffle)
+  : generic_data_reader(shuffle),
+    m_num_samples(num_samples),
+    m_num_labels(num_labels),
+    m_dimensions(dims)
+{
+  set_has_labels(true);
+}
 
 data_reader_synthetic::data_reader_synthetic(int num_samples,
                                              std::vector<int> dims,
                                              std::vector<int> response_dims,
                                              bool shuffle)
-  : generic_data_reader(shuffle), m_num_samples(num_samples),
-    m_num_labels(0), m_dimensions(dims), m_response_dimensions(response_dims) {}
+  : generic_data_reader(shuffle),
+    m_num_samples(num_samples),
+    m_num_labels(0),
+    m_dimensions(dims),
+    m_response_dimensions(response_dims)
+{
+  set_has_responses(true);
+}
 
 bool data_reader_synthetic::fetch_datum(CPUMat& X, int data_id, int mb_idx) {
   auto X_v = El::View(X, El::ALL, El::IR(mb_idx, mb_idx + 1));
@@ -74,8 +87,8 @@ bool data_reader_synthetic::fetch_label(CPUMat& Y, int data_id, int mb_idx) {
   if (m_num_labels == 0) {
     LBANN_ERROR("Synthetic data reader does not have labels");
   }
-  auto io_rng = set_io_generators_local_index(0);
-  Y.Set(fast_rand_int(get_fast_io_generator(), m_num_labels), mb_idx, 1);
+  auto index = fast_rand_int(get_fast_io_generator(), m_num_labels);
+  Y.Set(index, mb_idx, 1);
   return true;
 }
 
diff --git a/src/data_readers/unit_test/CMakeLists.txt b/src/data_readers/unit_test/CMakeLists.txt
index 09644deeb9d..4221e14e068 100644
--- a/src/data_readers/unit_test/CMakeLists.txt
+++ b/src/data_readers/unit_test/CMakeLists.txt
@@ -1,6 +1,7 @@
 set_full_path(THIS_DIR_SEQ_CATCH2_TEST_FILES
   data_reader_smiles_test.cpp
   data_reader_HDF5_hrrl_data_test.cpp
+  data_reader_synthetic_test.cpp
   )
 
 set_full_path(THIS_DIR_MPI_CATCH2_TEST_FILES
@@ -8,6 +9,7 @@ set_full_path(THIS_DIR_MPI_CATCH2_TEST_FILES
   data_reader_smiles_sample_list_test.cpp
   data_reader_HDF5_test.cpp
   data_reader_HDF5_sample_list_test.cpp
+  data_reader_synthetic_test_public_api.cpp
   )
 
 set(LBANN_SEQ_CATCH2_TEST_FILES
diff --git a/src/data_readers/unit_test/data_reader_smiles_fetch_datum_test.cpp b/src/data_readers/unit_test/data_reader_smiles_fetch_datum_test.cpp
index d4f474dd161..a38915ea8ba 100644
--- a/src/data_readers/unit_test/data_reader_smiles_fetch_datum_test.cpp
+++ b/src/data_readers/unit_test/data_reader_smiles_fetch_datum_test.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -190,8 +190,6 @@ TEST_CASE("functional black-box", "[.filesystem][data reader][mpi][smiles]")
   //=========================================================================
   // instantiate and setup the data reader
   //=========================================================================
-  const int Max_seq_len = 56;
-
   lbann_data::LbannPB my_proto;
   if (!pb::TextFormat::ParseFromString(smiles_reader_prototext, &my_proto)) {
     throw "Parsing protobuf failed.";
@@ -199,11 +197,11 @@ TEST_CASE("functional black-box", "[.filesystem][data reader][mpi][smiles]")
 
   // set up the options that the reader expects
   // TODO MRW
-  //opts->set_option("use_data_store", true);
-  //opts->set_option("preload_data_store", true);
-  //opts->set_option("sequence_length", Max_seq_len);
-  //opts->set_option("vocab", vocab_fn);
-  //opts->set_option("prototext", prototext_fn);
+  // opts->set_option("use_data_store", true);
+  // opts->set_option("preload_data_store", true);
+  // opts->set_option("sequence_length", Max_seq_len);
+  // opts->set_option("vocab", vocab_fn);
+  // opts->set_option("prototext", prototext_fn);
 
   // instantiate and load the data readers
   std::map<lbann::execution_mode, lbann::generic_data_reader*> data_readers;
diff --git a/src/data_readers/unit_test/data_reader_synthetic_test.cpp b/src/data_readers/unit_test/data_reader_synthetic_test.cpp
new file mode 100644
index 00000000000..7c64b224672
--- /dev/null
+++ b/src/data_readers/unit_test/data_reader_synthetic_test.cpp
@@ -0,0 +1,194 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include <catch2/catch.hpp>
+
+#include "TestHelpers.hpp"
+#include "lbann/proto/proto_common.hpp"
+#include <google/protobuf/text_format.h>
+#include <lbann.pb.h>
+
+#include <cstdlib>
+#include <errno.h>
+#include <string.h>
+
+//#include "./data_reader_common_catch2.hpp"
+#include "lbann/data_readers/data_reader_synthetic.hpp"
+#include "lbann/data_readers/utils/input_data_type.hpp"
+#include "lbann/utils/hash.hpp"
+#include "lbann/utils/threads/thread_pool.hpp"
+#include "lbann/utils/threads/thread_utils.hpp"
+
+class DataReaderSyntheticWhiteboxTester
+{
+public:
+  bool fetch_datum(lbann::data_reader_synthetic& dr,
+                   lbann::CPUMat& X,
+                   int data_id,
+                   int mb_idx)
+  {
+    return dr.fetch_datum(X, data_id, mb_idx);
+  }
+  bool fetch_label(lbann::data_reader_synthetic& dr,
+                   lbann::CPUMat& Y,
+                   int data_id,
+                   int mb_idx)
+  {
+    return dr.fetch_label(Y, data_id, mb_idx);
+  }
+  bool fetch_response(lbann::data_reader_synthetic& dr,
+                      lbann::CPUMat& Y,
+                      int data_id,
+                      int mb_idx)
+  {
+    return dr.fetch_response(Y, data_id, mb_idx);
+  }
+};
+
+TEST_CASE("Synthetic data reader classification tests",
+          "[data_reader][synthetic][classification]")
+{
+  // initialize stuff (boilerplate)
+  lbann::init_random(42, 1);
+  lbann::init_data_seq_random(42);
+
+  DataReaderSyntheticWhiteboxTester white_box_tester;
+
+  // Create a local copy of the RNG to check the synthetic data reader
+  lbann::fast_rng_gen ref_fast_generator;
+  ref_fast_generator.seed(lbann::hash_combine(42, 0));
+
+  auto s = GENERATE(range(1, 11));
+  El::Int num_samples = s;
+  std::vector<int> dims = {s, s};
+  ;
+  El::Int num_labels = s * 2;
+
+  SECTION("fetch data and label")
+  {
+    auto dr = std::make_unique<lbann::data_reader_synthetic>(num_samples,
+                                                             dims,
+                                                             num_labels,
+                                                             false);
+    lbann::CPUMat X;
+    X.Resize(dims[0] * dims[1], num_samples);
+    lbann::CPUMat Y;
+    Y.Resize(num_labels, num_samples);
+    El::Zeros_seq(Y, num_labels, num_samples);
+
+    auto io_rng = lbann::set_io_generators_local_index(0);
+    for (auto j = 0; j < num_samples; j++) {
+      white_box_tester.fetch_datum(*dr, X, 0, j);
+      white_box_tester.fetch_label(*dr, Y, 0, j);
+    }
+
+    for (El::Int j = 0; j < num_samples; j++) {
+      // Create a new normal distribution for each sample.  This ensures
+      // that the behavior matches the implementation in the synthetic data
+      // reader and handles the case of odd numbers of entries with a normal
+      // distriubtion implementation. (Specifically that entries for a
+      // normal distribution are generated in pairs.)
+      std::normal_distribution<lbann::DataType> dist(float(0), float(1));
+      for (El::Int i = 0; i < X.Height(); i++) {
+        CHECK(X(i, j) == dist(ref_fast_generator));
+      }
+
+      auto index = lbann::fast_rand_int(ref_fast_generator, num_labels);
+      for (El::Int i = 0; i < Y.Height(); i++) {
+        if (index == i) {
+          CHECK(Y(i, j) == 1);
+        }
+        else {
+          CHECK(Y(i, j) == 0);
+        }
+      }
+    }
+  }
+}
+
+TEST_CASE("Synthetic data reader regression tests",
+          "[data_reader][synthetic][regression]")
+{
+  // initialize stuff (boilerplate)
+  lbann::init_random(42, 1);
+  lbann::init_data_seq_random(42);
+
+  DataReaderSyntheticWhiteboxTester white_box_tester;
+
+  // Create a local copy of the RNG to check the synthetic data reader
+  lbann::fast_rng_gen ref_fast_generator;
+  ref_fast_generator.seed(lbann::hash_combine(42, 0));
+
+  auto s = GENERATE(range(1, 11));
+  El::Int num_samples = s;
+  std::vector<int> dims = {s, s};
+  ;
+  std::vector<int> response_dims = {s + 1, s + 1};
+
+  SECTION("fetch data and response")
+  {
+    auto dr = std::make_unique<lbann::data_reader_synthetic>(num_samples,
+                                                             dims,
+                                                             response_dims,
+                                                             false);
+
+    lbann::CPUMat X;
+    X.Resize(dims[0] * dims[1], num_samples);
+    lbann::CPUMat Y;
+    Y.Resize(response_dims[0] * response_dims[1], num_samples);
+
+    auto io_rng = lbann::set_io_generators_local_index(0);
+    for (El::Int i = 0; i < num_samples; i++) {
+      white_box_tester.fetch_datum(*dr, X, 0, i);
+      white_box_tester.fetch_response(*dr, Y, 0, i);
+    }
+
+    for (El::Int j = 0; j < num_samples; j++) {
+      {
+        // Create a new normal distribution for each sample.  This ensures
+        // that the behavior matches the implementation in the synthetic data
+        // reader and handles the case of odd numbers of entries with a normal
+        // distriubtion implementation. (Specifically that entries for a
+        // normal distribution are generated in pairs.)
+        std::normal_distribution<lbann::DataType> dist(float(0), float(1));
+        for (El::Int i = 0; i < X.Height(); i++) {
+          CHECK(X(i, j) == dist(ref_fast_generator));
+        }
+      }
+      {
+        // Create a new normal distribution for each sample.  This ensures
+        // that the behavior matches the implementation in the synthetic data
+        // reader and handles the case of odd numbers of entries with a normal
+        // distriubtion implementation. (Specifically that entries for a
+        // normal distribution are generated in pairs.)
+        std::normal_distribution<lbann::DataType> dist(float(0), float(1));
+        for (El::Int i = 0; i < Y.Height(); i++) {
+          CHECK(Y(i, j) == dist(ref_fast_generator));
+        }
+      }
+    }
+  }
+}
diff --git a/src/data_readers/unit_test/data_reader_synthetic_test_public_api.cpp b/src/data_readers/unit_test/data_reader_synthetic_test_public_api.cpp
new file mode 100644
index 00000000000..f353ab28ad5
--- /dev/null
+++ b/src/data_readers/unit_test/data_reader_synthetic_test_public_api.cpp
@@ -0,0 +1,189 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include <catch2/catch.hpp>
+
+#include "MPITestHelpers.hpp"
+#include "TestHelpers.hpp"
+#include "lbann/proto/proto_common.hpp"
+#include <google/protobuf/text_format.h>
+#include <lbann.pb.h>
+
+#include <cstdlib>
+#include <errno.h>
+#include <string.h>
+
+//#include "./data_reader_common_catch2.hpp"
+#include "lbann/data_readers/data_reader_synthetic.hpp"
+#include "lbann/data_readers/utils/input_data_type.hpp"
+#include "lbann/utils/hash.hpp"
+#include "lbann/utils/threads/thread_pool.hpp"
+#include "lbann/utils/threads/thread_utils.hpp"
+
+class DataReaderSyntheticWhiteboxTester
+{
+public:
+  bool fetch_datum(lbann::data_reader_synthetic& dr,
+                   lbann::CPUMat& X,
+                   int data_id,
+                   int mb_idx)
+  {
+    return dr.fetch_datum(X, data_id, mb_idx);
+  }
+  bool fetch_label(lbann::data_reader_synthetic& dr,
+                   lbann::CPUMat& Y,
+                   int data_id,
+                   int mb_idx)
+  {
+    return dr.fetch_label(Y, data_id, mb_idx);
+  }
+  bool fetch_response(lbann::data_reader_synthetic& dr,
+                      lbann::CPUMat& Y,
+                      int data_id,
+                      int mb_idx)
+  {
+    return dr.fetch_response(Y, data_id, mb_idx);
+  }
+};
+
+TEST_CASE("Synthetic data reader public API tests",
+          "[mpi][data_reader][synthetic][public]")
+{
+  // initialize stuff (boilerplate)
+  auto& comm = unit_test::utilities::current_world_comm();
+  lbann::init_random(42, 1);
+  lbann::init_data_seq_random(42);
+
+  // Create a local copy of the RNG to check the synthetic data reader
+  lbann::fast_rng_gen ref_fast_generator;
+  ref_fast_generator.seed(lbann::hash_combine(42, 0));
+
+  // Initalize a per-trainer I/O thread pool
+  auto io_thread_pool = lbann::make_unique<lbann::thread_pool>();
+  io_thread_pool->launch_pinned_threads(1, 1);
+
+  std::set<std::string> active_data_fields = {"samples"};
+  active_data_fields.insert(
+    GENERATE(std::string("labels"), std::string("responses")));
+  auto s = GENERATE(range(1, 11));
+  El::Int num_samples = s;
+  std::vector<int> dims = {s, s};
+  El::Int num_labels = s * 2;
+  std::vector<int> response_dims = {s + 1, s + 1};
+
+  std::map<lbann::data_field_type, std::unique_ptr<lbann::CPUMat>>
+    owning_local_input_buffers;
+  std::map<lbann::data_field_type, lbann::CPUMat*> local_input_buffers;
+  for (auto& data_field : active_data_fields) {
+    auto local_mat = std::make_unique<lbann::CPUMat>();
+    if (data_field == INPUT_DATA_TYPE_SAMPLES) {
+      local_mat->Resize(dims[0] * dims[1], num_samples);
+      El::Zeros_seq(*local_mat, dims[0] * dims[1], num_samples);
+    }
+    else if (data_field == INPUT_DATA_TYPE_LABELS) {
+      local_mat->Resize(num_labels, num_samples);
+      El::Zeros_seq(*local_mat, num_labels, num_samples);
+    }
+    else if (data_field == INPUT_DATA_TYPE_RESPONSES) {
+      local_mat->Resize(response_dims[0] * response_dims[1], num_samples);
+    }
+    local_input_buffers[data_field] = local_mat.get();
+    owning_local_input_buffers[data_field] = std::move(local_mat);
+  }
+  El::Matrix<El::Int> indices_fetched;
+  El::Zeros_seq(indices_fetched, num_samples, 1);
+
+  SECTION("fetch data fields")
+  {
+    std::unique_ptr<lbann::data_reader_synthetic> dr;
+    if (owning_local_input_buffers.find(INPUT_DATA_TYPE_LABELS) !=
+        owning_local_input_buffers.end()) {
+      dr = std::make_unique<lbann::data_reader_synthetic>(num_samples,
+                                                          dims,
+                                                          num_labels,
+                                                          false);
+    }
+    else if (owning_local_input_buffers.find(INPUT_DATA_TYPE_RESPONSES) !=
+             owning_local_input_buffers.end()) {
+      dr = std::make_unique<lbann::data_reader_synthetic>(num_samples,
+                                                          dims,
+                                                          response_dims,
+                                                          false);
+    }
+    else {
+      LBANN_ERROR("Unknown data field");
+    }
+    dr->setup(io_thread_pool->get_num_threads(), io_thread_pool.get());
+    dr->set_rank(0);
+    dr->set_comm(&comm);
+    dr->set_num_parallel_readers(1);
+    dr->load();
+    dr->set_mini_batch_size(num_samples);
+    dr->set_last_mini_batch_size(num_samples);
+    dr->set_initial_position();
+
+    dr->fetch(local_input_buffers, indices_fetched);
+
+    // for (auto& [field, buf] : local_input_buffers) {
+    //   std::cout << "For field " << field << std::endl;
+    //   El::Print(*buf);
+    // }
+
+    // Check all of the results that were fetched.  Ensure that the
+    // data fields are accessed in the same order that they are in the map
+    for (El::Int j = 0; j < num_samples; j++) {
+      for (auto& data_field : active_data_fields) {
+        if (data_field == INPUT_DATA_TYPE_SAMPLES ||
+            data_field == INPUT_DATA_TYPE_RESPONSES) {
+          auto& X = *(local_input_buffers[data_field]);
+          // Create a new normal distribution for each sample.  This ensures
+          // that the behavior matches the implementation in the synthetic
+          // data reader and handles the case of odd numbers of entries with a
+          // normal distriubtion implementation. (Specifically that entries
+          // for a normal distribution are generated in pairs.)
+          std::normal_distribution<lbann::DataType> dist(float(0), float(1));
+          for (El::Int i = 0; i < X.Height(); i++) {
+            CHECK(X(i, j) == dist(ref_fast_generator));
+          }
+        }
+        else if (data_field == INPUT_DATA_TYPE_LABELS) {
+          auto& Y = *(local_input_buffers[INPUT_DATA_TYPE_LABELS]);
+          auto index = lbann::fast_rand_int(ref_fast_generator, num_labels);
+          // std::cout << "Here is the reference value " << index <<
+          // std::endl;
+          for (El::Int i = 0; i < Y.Height(); i++) {
+            if (index == i) {
+              CHECK(Y(i, j) == 1);
+            }
+            else {
+              CHECK(Y(i, j) == 0);
+            }
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/src/data_readers/utils/CMakeLists.txt b/src/data_readers/utils/CMakeLists.txt
deleted file mode 100644
index 1b5e0cc323c..00000000000
--- a/src/data_readers/utils/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-# Add the source files for this directory
-set_full_path(THIS_DIR_SOURCES
-  input_data_type.cpp
-  )
-
-# Propagate the files up the tree
-set(SOURCES "${SOURCES}" "${THIS_DIR_SOURCES}" PARENT_SCOPE)
diff --git a/src/data_readers/utils/input_data_type.cpp b/src/data_readers/utils/input_data_type.cpp
deleted file mode 100644
index 29f3fe9ee78..00000000000
--- a/src/data_readers/utils/input_data_type.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-
-#include <lbann/data_readers/utils/input_data_type.hpp>
-
-namespace lbann {
-
-std::string to_string(input_data_type const& idl) {
-  switch (idl) {
-  case input_data_type::SAMPLES:
-    return "samples";
-  case input_data_type::LABELS:
-    return "labels";
-  case input_data_type::RESPONSES:
-    return "responses";
-  }
-  return "invalid input_data_type";
-}
-
-}
diff --git a/src/data_store/data_store_conduit.cpp b/src/data_store/data_store_conduit.cpp
index e57aa5f48f3..b8c4ed237de 100644
--- a/src/data_store/data_store_conduit.cpp
+++ b/src/data_store/data_store_conduit.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -84,8 +84,8 @@ data_store_conduit::data_store_conduit(
     LBANN_ERROR("data_store_conduit is throwing a fake exception; this is for use during testing");
   }
 
-  if (arg_parser.get<std::string>(DATA_STORE_TEST_CHECKPOINT) != ""
-      && arg_parser.get<std::string>(DATA_STORE_SPILL) != "") {
+  if (arg_parser.get<std::string>(DATA_STORE_TEST_CHECKPOINT) != "" &&
+      arg_parser.get<std::string>(DATA_STORE_SPILL) != "") {
     LBANN_ERROR("you passed both --data_store_test_checkpoint and --data_store_spill; please use one or the other or none, but not both");
   }
   if (arg_parser.get<std::string>(DATA_STORE_TEST_CHECKPOINT) != "") {
@@ -1866,7 +1866,8 @@ void data_store_conduit::open_informational_files() {
   }
 
   // optionally, each <rank, reader_role> pair opens a debug file
-  if (arg_parser.get<bool>(DATA_STORE_DEBUG) && !m_debug && m_reader != nullptr) {
+  if (arg_parser.get<bool>(DATA_STORE_DEBUG) && !m_debug &&
+      m_reader != nullptr) {
     m_debug_filename = m_debug_filename_base + "_" + m_reader->get_role() + "." + std::to_string(m_comm->get_rank_in_world()) + ".txt";
     m_debug = new std::ofstream(m_debug_filename.c_str());
     if (!m_debug) {
@@ -1875,7 +1876,8 @@ void data_store_conduit::open_informational_files() {
   }
 
   // optionally, <P_0, reader_role> pair opens a file for writing
-  if (arg_parser.get<bool>(DATA_STORE_PROFILE) && m_world_master && !m_profile && m_reader != nullptr) {
+  if (arg_parser.get<bool>(DATA_STORE_PROFILE) && m_world_master &&
+      !m_profile && m_reader != nullptr) {
     m_profile_filename = m_profile_filename_base + "_" + m_reader->get_role() + ".txt";
     m_profile = new std::ofstream(m_profile_filename.c_str());
     if (!m_profile) {
diff --git a/src/execution_algorithms/ltfb/random_pairwise_exchange.cpp b/src/execution_algorithms/ltfb/random_pairwise_exchange.cpp
index 0649f5e3aff..acb3687b7cc 100644
--- a/src/execution_algorithms/ltfb/random_pairwise_exchange.cpp
+++ b/src/execution_algorithms/ltfb/random_pairwise_exchange.cpp
@@ -138,8 +138,8 @@ RandomPairwiseExchange::RandomPairwiseExchange(
   std::unordered_map<std::string, metric_strategy> metrics,
   std::unique_ptr<ExchangeStrategy> comm_algo,
   std::unique_ptr<MutationStrategy> mutate_algo)
-  : m_metrics{std::move(metrics)}, 
-    m_comm_algo{std::move(comm_algo)}, 
+  : m_metrics{std::move(metrics)},
+    m_comm_algo{std::move(comm_algo)},
     m_mutate_algo{std::move(mutate_algo)}
 {
   LBANN_ASSERT(m_metrics.size());
@@ -151,13 +151,13 @@ RandomPairwiseExchange::RandomPairwiseExchange(
   std::unique_ptr<ExchangeStrategy> comm_algo,
   std::unique_ptr<MutationStrategy> mutate_algo)
   : RandomPairwiseExchange({{metric_name, winner_strategy}},
-                           std::move(comm_algo), 
+                           std::move(comm_algo),
                            std::move(mutate_algo))
 {}
 
 RandomPairwiseExchange::RandomPairwiseExchange(
   RandomPairwiseExchange const& other)
-  : m_metrics{other.m_metrics}, 
+  : m_metrics{other.m_metrics},
     m_comm_algo{other.m_comm_algo->clone()},
     m_mutate_algo{other.m_mutate_algo->clone()}
 {}
@@ -325,7 +325,7 @@ void RandomPairwiseExchange::select_next(model& m,
     m_mutate_algo->mutate(m, step);
 
     auto& trainer = get_trainer();
-    auto&& metadata = trainer.get_data_coordinator().get_dr_metadata();
+    auto&& metadata = dc.get_dr_metadata();
     m.setup(trainer.get_max_mini_batch_size(),
             metadata,
             /*force*/true);
@@ -438,7 +438,7 @@ make_null_mutation(google::protobuf::Message const& msg)
 {
   using NullMutation = lbann_data::MutationStrategy::NullMutation;
   LBANN_ASSERT(dynamic_cast<NullMutation const*>(&msg));
-  return std::make_unique<lbann::ltfb::NullMutation>();  
+  return std::make_unique<lbann::ltfb::NullMutation>();
 }
 
 std::unique_ptr<lbann::ltfb::ReplaceActivation>
@@ -505,7 +505,7 @@ lbann::make_abstract<lbann::ltfb::MutationStrategy>(
   using ProtoStrategy = lbann_data::MutationStrategy;
   auto const& params = dynamic_cast<ProtoStrategy const&>(msg);
 
-  auto const& mutate_params = 
+  auto const& mutate_params =
     proto::helpers::get_oneof_message(params, "strategy");
   return get_mutation_factory().create_object(
     proto::helpers::message_type(mutate_params),
@@ -535,8 +535,7 @@ lbann::make<lbann::ltfb::RandomPairwiseExchange>(
 
   using ExchangeStrategyType =
     lbann::ltfb::RandomPairwiseExchange::ExchangeStrategy;
-  using MutationStrategyType = 
-    lbann::ltfb::MutationStrategy;
+  using MutationStrategyType = lbann::ltfb::MutationStrategy;
   return make_unique<lbann::ltfb::RandomPairwiseExchange>(
     std::move(metric_map),
     make_abstract<ExchangeStrategyType>(msg.exchange_strategy()),
diff --git a/src/execution_algorithms/ltfb/truncation_selection_exchange.cpp b/src/execution_algorithms/ltfb/truncation_selection_exchange.cpp
index 1a2452cebba..a5e433df065 100644
--- a/src/execution_algorithms/ltfb/truncation_selection_exchange.cpp
+++ b/src/execution_algorithms/ltfb/truncation_selection_exchange.cpp
@@ -269,10 +269,10 @@ void TruncationSelectionExchange::select_next(model& m,
     auto& partner_model = *partner_model_ptr;
     unpack(partner_model, rcv_str);
     auto& trainer = get_trainer();
-    auto&& metadata = trainer.get_data_coordinator().get_dr_metadata();
+    auto&& metadata = dc.get_dr_metadata();
     m.setup(trainer.get_max_mini_batch_size(),
             metadata,
-            /*force=*/true);
+            /*force*/ true);
   }
 }
 
diff --git a/src/execution_algorithms/unit_test/inference_algorithm_test.cpp b/src/execution_algorithms/unit_test/inference_algorithm_test.cpp
index f2f8c4c8ecc..bfddd2b2f6a 100644
--- a/src/execution_algorithms/unit_test/inference_algorithm_test.cpp
+++ b/src/execution_algorithms/unit_test/inference_algorithm_test.cpp
@@ -30,9 +30,10 @@
 #include "MPITestHelpers.hpp"
 
 #include <lbann/base.hpp>
+#include <lbann/execution_algorithms/batch_functional_inference_algorithm.hpp>
 #include <lbann/models/directed_acyclic_graph.hpp>
 #include <lbann/models/model.hpp>
-#include <lbann/execution_algorithms/batch_functional_inference_algorithm.hpp>
+#include <lbann/utils/lbann_library.hpp>
 
 #include <lbann.pb.h>
 #include <google/protobuf/text_format.h>
@@ -48,7 +49,7 @@ model {
     name: "layer1"
     children: "layer2"
     input {
-      target_mode: "N/A"
+      data_field: "samples"
     }
   }
   layer {
@@ -76,6 +77,8 @@ auto make_model(lbann::lbann_comm& comm, int class_n)
   lbann_data::LbannPB my_proto;
   if (!pb::TextFormat::ParseFromString(model_prototext, &my_proto))
     throw "Parsing protobuf failed.";
+  // Construct a trainer so that the model can register the input layer
+  lbann::construct_trainer(&comm, my_proto.mutable_trainer(), my_proto);
   auto metadata = mock_datareader_metadata(class_n);
   auto my_model = lbann::proto::construct_model(&comm,
                                                 -1,
diff --git a/src/layers/io/cereal_registration/input_layer.cpp b/src/layers/io/cereal_registration/input_layer.cpp
index 5fa50c1efb4..e9ddaffc995 100644
--- a/src/layers/io/cereal_registration/input_layer.cpp
+++ b/src/layers/io/cereal_registration/input_layer.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -37,7 +37,7 @@ ::serialize(ArchiveT& ar)
   using DataTypeLayer = data_type_layer<TensorDataType>;
   ar(::cereal::make_nvp("DataTypeLayer",
                         ::cereal::base_class<DataTypeLayer>(this)),
-     CEREAL_NVP(m_data_reader_mode));
+     CEREAL_NVP(m_data_field));
 }
 
 } // namespace lbann
diff --git a/src/layers/io/input_layer.cpp b/src/layers/io/input_layer.cpp
index d075235127e..7cc9e9c9fd1 100644
--- a/src/layers/io/input_layer.cpp
+++ b/src/layers/io/input_layer.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -44,6 +44,10 @@ setup_dims(DataReaderMetaData& dr_metadata) {
   for (int i = 0; i < this->get_num_children(); ++i) {
     this->set_output_dims(get_data_dims(dr_metadata, i), i);
   }
+  if (m_data_field == "") {
+    LBANN_ERROR("Failed to setup input layer with empty data field");
+  }
+  get_trainer().get_data_coordinator().register_active_data_field(m_data_field);
 }
 
 template <typename TensorDataType,
@@ -106,26 +110,9 @@ void input_layer<TensorDataType, T_layout, Dev>::fp_compute()
       static_cast<buffered_data_coordinator<TensorDataType>&>(
         get_trainer().get_data_coordinator());
 
-    //  partitioned_io_buffer<TensorDataType>* io_buffer = dc.get_active_buffer(mode);
-    // generic_io_buffer<TensorDataType>* io_buffer = dc.m_io_buffers[dc.get_active_buffer_idx(mode) % dc.m_io_buffers.size()];
-
-    // if(dynamic_cast<partitioned_io_buffer<TensorDataType>*>(io_buffer) != nullptr) {
-    // Use the predetermined size of the mini-batch to set the current
-    // batch size for the neural network
-    int num_samples_in_batch = dc.get_current_mini_batch_size(mode);
-
-    dc.update_num_samples_processed(mode, num_samples_in_batch);
-    std::map<input_data_type, AbsDistMatrixType*> input_buffers;
-    input_buffers[input_data_type::SAMPLES] = &(this->get_activations(0));
-    if(this->m_expected_num_child_layers > 1) {
-      if(is_for_regression()) {
-        input_buffers[input_data_type::RESPONSES] = &(this->get_activations(1));
-      }else {
-        input_buffers[input_data_type::LABELS] = &(this->get_activations(1));
-      }
-    }
-
-    dc.distribute_from_local_matrix(mode, input_buffers);
+    dc.distribute_from_local_matrix(mode,
+                                    m_data_field,
+                                    this->get_activations(0));
 
 #ifdef LBANN_HAS_DISTCONV
     if (this->distconv_enabled()) {
@@ -149,13 +136,21 @@ template <typename TensorDataType,
           El::Device Dev>
 std::vector<int> input_layer<TensorDataType, T_layout, Dev>::
 get_data_dims(DataReaderMetaData& dr_metadata, int child_index) const {
-  if(child_index == 0) {
-    return dr_metadata.data_dims[data_reader_target_mode::INPUT];
-  }else if(child_index == 1) {
-    return dr_metadata.data_dims[this->m_data_reader_mode];
-  }else {
+  if (child_index != 0) {
     LBANN_ERROR("get_data_dims: Invalid child index");
   }
+  if (m_data_field == INPUT_DATA_TYPE_SAMPLES) {
+    return dr_metadata.data_dims[data_reader_target_mode::INPUT];
+  }
+  else if (m_data_field == INPUT_DATA_TYPE_LABELS) {
+    return dr_metadata.data_dims[data_reader_target_mode::CLASSIFICATION];
+  }
+  else if (m_data_field == INPUT_DATA_TYPE_RESPONSES) {
+    return dr_metadata.data_dims[data_reader_target_mode::REGRESSION];
+  }
+  else {
+    LBANN_ERROR("Unknown data_field_type value provided: " + m_data_field);
+  }
   return std::vector<int>(1, 0);
 }
 
diff --git a/src/layers/learning/base_convolution.cpp b/src/layers/learning/base_convolution.cpp
index c1b926bd3db..0aa07f89a2e 100644
--- a/src/layers/learning/base_convolution.cpp
+++ b/src/layers/learning/base_convolution.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/src/models/unit_test/lenet.prototext.inc b/src/models/unit_test/lenet.prototext.inc
index 0424f7babff..6d93ab87947 100644
--- a/src/models/unit_test/lenet.prototext.inc
+++ b/src/models/unit_test/lenet.prototext.inc
@@ -15,29 +15,23 @@ model {
   }
   num_epochs: 20
   layer {
-    name: "layer1"
-    children: "layer2 layer3"
+    name: "image"
+    data_layout: "data_parallel"
     input {
-      target_mode: "classification"
+      data_field: "samples"
     }
   }
+
   layer {
-    name: "layer3"
-    parents: "layer1"
-    children: "layer16 layer17"
-    identity {
-    }
-  }
-  layer {
-    name: "layer2"
-    parents: "layer1"
-    children: "layer4"
-    identity {
+    name: "label"
+    data_layout: "data_parallel"
+    input {
+      data_field: "labels"
     }
   }
   layer {
     name: "layer4"
-    parents: "layer2"
+    parents: "image"
     children: "layer5"
     convolution {
       num_dims: 2
@@ -149,13 +143,13 @@ model {
   }
   layer {
     name: "layer17"
-    parents: "layer15 layer3"
+    parents: "layer15 label"
     categorical_accuracy {
     }
   }
   layer {
     name: "layer16"
-    parents: "layer15 layer3"
+    parents: "layer15 label"
     cross_entropy {
     }
   }
diff --git a/src/models/unit_test/modify_test.cpp b/src/models/unit_test/modify_test.cpp
index 54d29500edc..7eaff04c264 100644
--- a/src/models/unit_test/modify_test.cpp
+++ b/src/models/unit_test/modify_test.cpp
@@ -34,6 +34,7 @@
 #include <lbann/base.hpp>
 #include <lbann/models/directed_acyclic_graph.hpp>
 #include <lbann/models/model.hpp>
+#include <lbann/utils/lbann_library.hpp>
 
 #include <google/protobuf/text_format.h>
 #include <lbann.pb.h>
@@ -61,6 +62,8 @@ auto make_model(lbann::lbann_comm& comm)
   lbann_data::LbannPB my_proto;
   if (!pb::TextFormat::ParseFromString(model_prototext, &my_proto))
     throw "Parsing protobuf failed.";
+  // Construct a trainer so that the model can register the input layer
+  lbann::construct_trainer(&comm, my_proto.mutable_trainer(), my_proto);
   auto metadata = mock_datareader_metadata();
   auto my_model = lbann::proto::construct_model(&comm,
                                                 -1,
diff --git a/src/proto/factories/layer_factory.cpp b/src/proto/factories/layer_factory.cpp
index 25ce063c3d7..f8ed69128b5 100644
--- a/src/proto/factories/layer_factory.cpp
+++ b/src/proto/factories/layer_factory.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -276,13 +276,7 @@ std::unique_ptr<Layer> construct_layer_legacy(
   // arguments.
   if (proto_layer.has_input()) {
     const auto& params = proto_layer.input();
-    const auto& mode_str = params.target_mode();
-    data_reader_target_mode target_mode = data_reader_target_mode::NA;
-    if (mode_str == "classification") { target_mode = data_reader_target_mode::CLASSIFICATION; }
-    if (mode_str == "regression")                         { target_mode = data_reader_target_mode::REGRESSION; }
-    if (mode_str == "reconstruction")                     { target_mode = data_reader_target_mode::RECONSTRUCTION; }
-    if (mode_str == "label_reconstruction")               { target_mode = data_reader_target_mode::LABEL_RECONSTRUCTION; }
-    if (mode_str.empty() || mode_str == "na" || mode_str == "NA" || mode_str == "N/A") { target_mode = data_reader_target_mode::NA; }
+    const auto& data_field = params.data_field();
     if (Layout != data_layout::DATA_PARALLEL) {
       LBANN_ERROR("input layer is only supported with "
                   "a data-parallel layout");
@@ -292,10 +286,9 @@ std::unique_ptr<Layer> construct_layer_legacy(
     /// this is not related to this PR.
     if ((typeid(TensorDataType) == typeid(DataType))
         && (Layout == data_layout::DATA_PARALLEL)) {
-      return lbann::make_unique<input_layer<DataType,
-                                            data_layout::DATA_PARALLEL,
-                                            Device>>(comm,
-                                                     target_mode);
+      return lbann::make_unique<
+        input_layer<DataType, data_layout::DATA_PARALLEL, Device>>(comm,
+                                                                   data_field);
     }
     else {
       LBANN_ERROR("Input layers are only valid with "
diff --git a/src/proto/layers.proto b/src/proto/layers.proto
index 311a2f38499..82e034eafbe 100644
--- a/src/proto/layers.proto
+++ b/src/proto/layers.proto
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -296,7 +296,7 @@ message Layer {
   // Input layers //
   //////////////////
   message Input {
-    string target_mode = 3;       // Options: "classification", "regression", "reconstruction", "label_reconstruction", "N/A" (default)
+    string data_field = 1; // legacy names are: samples, labels, responses
   }
 
   //////////////////////
diff --git a/src/proto/proto_common.cpp b/src/proto/proto_common.cpp
index d936922cd43..47e7780c18f 100644
--- a/src/proto/proto_common.cpp
+++ b/src/proto/proto_common.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -422,12 +422,14 @@ void init_data_readers(
         reader->set_absolute_sample_count(arg_parser.get<int>(TEST_TARBALL));
         reader->set_use_percent( 0. );
         reader->set_first_n(0);
-      } else {
+      }
+      else {
         reader->set_absolute_sample_count( 0. );
         reader->set_use_percent( 1.0 );
         reader->set_first_n( 0 );
       }
-    } else {
+    }
+    else {
       reader->set_absolute_sample_count( readme.absolute_sample_count() );
       reader->set_use_percent( readme.percent_of_data_to_use() );
       reader->set_first_n( readme.first_n() );
@@ -450,12 +452,14 @@ void init_data_readers(
     if (readme.role() == "train") {
       if (arg_parser.get<bool>(CREATE_TARBALL) || separate_validation) {
         reader->set_execution_mode_split_percent(execution_mode::validation, 0. );
-      } else {
+      }
+      else {
         reader->set_execution_mode_split_percent(execution_mode::validation, readme.validation_percent() );
       }
       if (arg_parser.get<bool>(CREATE_TARBALL) || separate_tournament) {
         reader->set_execution_mode_split_percent(execution_mode::tournament, 0. );
-      } else {
+      }
+      else {
         reader->set_execution_mode_split_percent(execution_mode::tournament, readme.tournament_percent() );
       }
     }
@@ -721,7 +725,8 @@ void set_data_readers_filenames(
       s << "data_filedir_" << which;
       if (arg_parser.get<std::string>(s.str()) != "") {
         r->set_data_filedir(arg_parser.get<std::string>(s.str()));
-      }else {
+      }
+      else {
         s.clear();
         s.str("");
         s << "data_filedir";
@@ -829,22 +834,22 @@ void get_cmdline_overrides(const lbann_comm& comm, lbann_data::LbannPB& p)
     }
   }
 
-  if ((arg_parser.get<std::string>(DATA_FILEDIR) != "")
-      or (arg_parser.get<std::string>(DATA_FILEDIR_TRAIN) != "")
-      or (arg_parser.get<std::string>(DATA_FILENAME_TRAIN) != "")
-      or (arg_parser.get<std::string>(LABEL_FILENAME_TRAIN) != "")) {
+  if ((arg_parser.get<std::string>(DATA_FILEDIR) != "") or
+      (arg_parser.get<std::string>(DATA_FILEDIR_TRAIN) != "") or
+      (arg_parser.get<std::string>(DATA_FILENAME_TRAIN) != "") or
+      (arg_parser.get<std::string>(LABEL_FILENAME_TRAIN) != "")) {
     set_data_readers_filenames("train", p);
   }
-  if ((arg_parser.get<std::string>(DATA_FILEDIR) != "")
-      or (arg_parser.get<std::string>(DATA_FILEDIR_VALIDATE) != "")
-      or (arg_parser.get<std::string>(DATA_FILENAME_VALIDATE) != "")
-      or (arg_parser.get<std::string>(LABEL_FILENAME_VALIDATE) != "")) {
+  if ((arg_parser.get<std::string>(DATA_FILEDIR) != "") or
+      (arg_parser.get<std::string>(DATA_FILEDIR_VALIDATE) != "") or
+      (arg_parser.get<std::string>(DATA_FILENAME_VALIDATE) != "") or
+      (arg_parser.get<std::string>(LABEL_FILENAME_VALIDATE) != "")) {
     set_data_readers_filenames("validate", p);
   }
-  if ((arg_parser.get<std::string>(DATA_FILEDIR) != "")
-      or (arg_parser.get<std::string>(DATA_FILEDIR_TEST) != "")
-      or (arg_parser.get<std::string>(DATA_FILENAME_TEST) != "")
-      or (arg_parser.get<std::string>(LABEL_FILENAME_TEST) != "")) {
+  if ((arg_parser.get<std::string>(DATA_FILEDIR) != "") or
+      (arg_parser.get<std::string>(DATA_FILEDIR_TEST) != "") or
+      (arg_parser.get<std::string>(DATA_FILENAME_TEST) != "") or
+      (arg_parser.get<std::string>(LABEL_FILENAME_TEST) != "")) {
     set_data_readers_filenames("test", p);
   }
   if (arg_parser.get<std::string>(SAMPLE_LIST_TRAIN) != "") {
@@ -878,7 +883,8 @@ void get_cmdline_overrides(const lbann_comm& comm, lbann_data::LbannPB& p)
     trainer->set_hydrogen_block_size(arg_parser.get<int>(HYDROGEN_BLOCK_SIZE));
   }
   if (arg_parser.get<int>(NUM_PARALLEL_READERS) != -1) {
-    trainer->set_num_parallel_readers(arg_parser.get<int>(NUM_PARALLEL_READERS));
+    trainer->set_num_parallel_readers(
+      arg_parser.get<int>(NUM_PARALLEL_READERS));
   }
   if (arg_parser.get<bool>(DISABLE_CUDA)) {
     model->set_disable_cuda(arg_parser.get<bool>(DISABLE_CUDA));
@@ -886,10 +892,9 @@ void get_cmdline_overrides(const lbann_comm& comm, lbann_data::LbannPB& p)
   if (arg_parser.get<int>(RANDOM_SEED) == -1) {
     trainer->set_random_seed(arg_parser.get<int>(RANDOM_SEED));
   }
-  if(arg_parser.get<bool>(SERIALIZE_IO)) {
+  if (arg_parser.get<bool>(SERIALIZE_IO)) {
     trainer->set_serialize_io(arg_parser.get<bool>(SERIALIZE_IO));
   }
-
 }
 
 void print_parameters(const lbann_comm& comm,
diff --git a/src/trainers/trainer.cpp b/src/trainers/trainer.cpp
index aa837e1e074..731f0a04042 100644
--- a/src/trainers/trainer.cpp
+++ b/src/trainers/trainer.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -239,6 +239,7 @@ void trainer::train(observer_ptr<model> model,
   }
   DataReaderMetaData dr_metadata = get_data_coordinator().get_dr_metadata();
   m_training_alg->setup_models({model}, get_max_mini_batch_size(), dr_metadata);
+
   // FIXME (trb 04/27/2021): This is a hack to support the current
   // checkpoint/restart mechanisms. This needs to be refactored to be
   // agnostic to the training algorithm. At this time, only SGD is
@@ -276,8 +277,8 @@ void trainer::evaluate(observer_ptr<model> model,
   DataReaderMetaData dr_metadata = get_data_coordinator().get_dr_metadata();
   sgd->setup_models({model}, get_max_mini_batch_size(), dr_metadata);
 
-  if(m_comm->get_grid_type() == GridType::NO_GRID or 
-     m_comm->get_grid_type() == GridType::PRIMARY_GRID){
+  if (m_comm->get_grid_type() == GridType::NO_GRID or
+      m_comm->get_grid_type() == GridType::PRIMARY_GRID) {
     sgd->evaluate(*ctxt, *model, get_data_coordinator(), mode,
                   epoch_termination_criteria(/*num_epochs=*/1UL));
   }
diff --git a/src/utils/lbann_library.cpp b/src/utils/lbann_library.cpp
index 81447332246..222e843f810 100644
--- a/src/utils/lbann_library.cpp
+++ b/src/utils/lbann_library.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -131,9 +131,10 @@ void finalize_trainer() {
 }
 
 /// Construct a trainer that contains a lbann comm object and threadpool
-trainer& construct_trainer(lbann_comm *comm,
+trainer& construct_trainer(lbann_comm* comm,
                            lbann_data::Trainer* pb_trainer,
-                           lbann_data::LbannPB &pb) {
+                           lbann_data::LbannPB& pb)
+{
   if (pb_trainer->num_parallel_readers() > comm->get_procs_per_trainer()) {
     pb_trainer->set_num_parallel_readers(comm->get_procs_per_trainer());
   }
@@ -151,7 +152,8 @@ trainer& construct_trainer(lbann_comm *comm,
   }
 
   // Initalize a per-trainer I/O thread pool
-  std::unique_ptr<thread_pool> io_thread_pool = construct_io_thread_pool(comm, serialized_io);
+  std::unique_ptr<thread_pool> io_thread_pool =
+    construct_io_thread_pool(comm, serialized_io);
 
   // Setup I/O threads
   auto io_threads_per_process = io_thread_pool->get_num_threads();
@@ -277,7 +279,6 @@ trainer& construct_trainer(lbann_comm *comm,
     global_trainer_->allow_background_io_activity(false);
   }
 
-
   // Report useful information
   if (comm->am_world_master()) {
     print_lbann_configuration(comm,
@@ -295,7 +296,9 @@ trainer& construct_trainer(lbann_comm *comm,
 }
 
 // Setup I/O thread pool that is shared across all models
-  std::unique_ptr<thread_pool> construct_io_thread_pool(lbann_comm *comm, bool serialized_io) {
+std::unique_ptr<thread_pool> construct_io_thread_pool(lbann_comm* comm,
+                                                      bool serialized_io)
+{
   int max_io_threads = num_free_cores_per_process(comm);
   // Allow the trainer to override the command-line option or environment variable
   if(serialized_io) {
@@ -393,10 +396,12 @@ std::unique_ptr<model> build_model_from_prototext(
     }
 
     std::string active_load_model_dir;
-    std::string load_model_dir = arg_parser.get<std::string>(LOAD_MODEL_WEIGHTS_DIR);
-    if(arg_parser.get<bool>(LOAD_MODEL_WEIGHTS_DIR_IS_COMPLETE)) {
+    std::string load_model_dir =
+      arg_parser.get<std::string>(LOAD_MODEL_WEIGHTS_DIR);
+    if (arg_parser.get<bool>(LOAD_MODEL_WEIGHTS_DIR_IS_COMPLETE)) {
       active_load_model_dir = load_model_dir;
-    }else {
+    }
+    else {
       size_t epochLast = std::numeric_limits<size_t>::max();;
       size_t stepLast = std::numeric_limits<size_t>::max();;
       execution_mode mode = execution_mode::invalid;
diff --git a/src/utils/options.cpp b/src/utils/options.cpp
index 4f537b235c0..b65e3c6a58e 100644
--- a/src/utils/options.cpp
+++ b/src/utils/options.cpp
@@ -29,105 +29,106 @@
 
 namespace lbann {
 
-void construct_std_options() {
+void construct_std_options()
+{
   auto& arg_parser = global_argument_parser();
 
   // Bool flags
-  arg_parser.add_flag(DISABLE_BACKGROUND_IO_ACTIVITY,
-                      {"--disable_background_io_activity"},
-                      "[STD] prevent the input layers from fetching data in the background");
-  arg_parser.add_flag(DISABLE_CUDA,
-                      {"--disable_cuda"},
-                      "[STD] has no effect unless LBANN was compiled with LBANN_HAS_CUDNN");
-  arg_parser.add_flag(LOAD_MODEL_WEIGHTS_DIR_IS_COMPLETE,
-                      {"--load_model_weights_dir_is_complete"},
-                      "[STD] Use load_model_weights_dir as given, ignoring checkpoint hierarchy");
+  arg_parser.add_flag(
+    DISABLE_BACKGROUND_IO_ACTIVITY,
+    {"--disable_background_io_activity"},
+    "[STD] prevent the input layers from fetching data in the background");
+  arg_parser.add_flag(
+    DISABLE_CUDA,
+    {"--disable_cuda"},
+    "[STD] has no effect unless LBANN was compiled with LBANN_HAS_CUDNN");
+  arg_parser.add_flag(
+    LOAD_MODEL_WEIGHTS_DIR_IS_COMPLETE,
+    {"--load_model_weights_dir_is_complete"},
+    "[STD] Use load_model_weights_dir as given, ignoring checkpoint hierarchy");
   arg_parser.add_flag(LTFB_ALLOW_GLOBAL_STATISTICS,
                       {"--ltfb_allow_global_statistics"},
                       utils::ENV("LBANN_LTFB_ALLOW_GLOBAL_STATISTICS"),
                       "[STD] Allow the print_statistics callback to report "
                       "global (inter-trainer) summary statistics.");
-  arg_parser.add_flag(LTFB_VERBOSE,
-                      {"--ltfb_verbose"},
-                      "[STD] Increases number of per-trainer messages that are reported");
-  arg_parser.add_flag(NO_IM_COMM,
-                      {"--no_im_comm"},
-                      "[STD] removed ImComm callback, if present; this is intended for"
-                      "running alexnet with a single model, but may be useful elsewhere");
+  arg_parser.add_flag(
+    LTFB_VERBOSE,
+    {"--ltfb_verbose"},
+    "[STD] Increases number of per-trainer messages that are reported");
+  arg_parser.add_flag(
+    NO_IM_COMM,
+    {"--no_im_comm"},
+    "[STD] removed ImComm callback, if present; this is intended for"
+    "running alexnet with a single model, but may be useful elsewhere");
   arg_parser.add_flag(PRELOAD_DATA_STORE,
                       {"--preload_data_store"},
-                      "[STD] Preloads the data store in-memory structure druing data reader load time");
-  arg_parser.add_flag(PRINT_AFFINITY,
-                      {"--print_affinity"},
-                      "[STD] display information on how OpenMP threads are provisioned");
-  arg_parser.add_flag(SERIALIZE_IO,
-                      {"--serialize_io"},
-                      "[STD] force data readers to use a single threaded for I/O");
-  arg_parser.add_flag(ST_FULL_TRACE,
-                      {"--st_full_trace"},
-                      "[STD] TODO");
-  arg_parser.add_flag(ST_ON,
-                      {"--st_on"},
-                      "[STD] TODO");
-	arg_parser.add_flag(USE_CUBLAS_TENSOR_OPS,
-											{"--use-cublas-tensor-ops"},
-											utils::ENV("LBANN_USE_CUBLAS_TENSOR_OPS"),
-											"[STD] Set the default cuBLAS math mode to use "
-											"Tensor Core operations when available.");
-	arg_parser.add_flag(USE_CUDNN_TENSOR_OPS,
-											{"--use-cudnn-tensor-ops"},
-											utils::ENV("LBANN_USE_CUDNN_TENSOR_OPS"),
-											"[STD] Set the default cuDNN math mode to use "
-											"Tensor Core operations when available.");
+                      "[STD] Preloads the data store in-memory structure "
+                      "druing data reader load time");
+  arg_parser.add_flag(
+    PRINT_AFFINITY,
+    {"--print_affinity"},
+    "[STD] display information on how OpenMP threads are provisioned");
+  arg_parser.add_flag(
+    SERIALIZE_IO,
+    {"--serialize_io"},
+    "[STD] force data readers to use a single threaded for I/O");
+  arg_parser.add_flag(ST_FULL_TRACE, {"--st_full_trace"}, "[STD] TODO");
+  arg_parser.add_flag(ST_ON, {"--st_on"}, "[STD] TODO");
+  arg_parser.add_flag(USE_CUBLAS_TENSOR_OPS,
+                      {"--use-cublas-tensor-ops"},
+                      utils::ENV("LBANN_USE_CUBLAS_TENSOR_OPS"),
+                      "[STD] Set the default cuBLAS math mode to use "
+                      "Tensor Core operations when available.");
+  arg_parser.add_flag(USE_CUDNN_TENSOR_OPS,
+                      {"--use-cudnn-tensor-ops"},
+                      utils::ENV("LBANN_USE_CUDNN_TENSOR_OPS"),
+                      "[STD] Set the default cuDNN math mode to use "
+                      "Tensor Core operations when available.");
   arg_parser.add_flag(USE_DATA_STORE,
                       {"--use_data_store"},
                       "[STD] Enables the data store in-memory structure");
-  arg_parser.add_flag(USE_LTFB,
-                      {"--ltfb"},
-                      "[STD] TODO");
+  arg_parser.add_flag(USE_LTFB, {"--ltfb"}, "[STD] TODO");
   arg_parser.add_flag(VERBOSE,
                       {"--verbose", "--verbose_print"},
                       "[STD] Turns on verbose mode");
   arg_parser.add_flag(WRITE_SAMPLE_LIST,
                       {"--write_sample_list"},
-                      "[STD] Writes out the sample list that was loaded into the current directory");
+                      "[STD] Writes out the sample list that was loaded into "
+                      "the current directory");
 
   // Input options
-  arg_parser.add_option(CKPT_DIR,
-                        {"--checkpoint_dir", "--ckpt_dir"},
-                        "[STD] Save to or restart from a specific checkpoint directory.\n"
-                        "Additionally, sets the output directory for dumping weights.\n"
-                        "Modifies callbacks: checkpoint, save_model, dump_weights\n",
-                        "");
+  arg_parser.add_option(
+    CKPT_DIR,
+    {"--checkpoint_dir", "--ckpt_dir"},
+    "[STD] Save to or restart from a specific checkpoint directory.\n"
+    "Additionally, sets the output directory for dumping weights.\n"
+    "Modifies callbacks: checkpoint, save_model, dump_weights\n",
+    "");
   arg_parser.add_option(HYDROGEN_BLOCK_SIZE,
                         {"--hydrogen_block_size"},
                         "[STD] Block size for Hydrogen",
                         0);
-  arg_parser.add_option(LOAD_MODEL_WEIGHTS_DIR,
-                        {"--load_model_weights_dir"},
-                        "[STD] Load model wieghts found in the given directory.\n"
-                        "If the directory doesn't exist, doesn't contain valid weights,\n"
-                        "or doesn't contain a checkpoint,\n"
-                        "an error will be thrown.\n",
-                        "");
-  arg_parser.add_option(MAX_RNG_SEEDS_DISPLAY,
-                        {"--rng_seeds_per_trainer_to_display"},
-                        utils::ENV("LBANN_RNG_SEEDS_PER_TRAINER_TO_DISPLAY"),
-                        "[STD] Limit how many random seeds LBANN should display "
-                        "from each trainer",
-                        2);
-  arg_parser.add_option(METADATA,
-                        {"--metadata"},
-                        "[STD] TODO",
-                        "");
+  arg_parser.add_option(
+    LOAD_MODEL_WEIGHTS_DIR,
+    {"--load_model_weights_dir"},
+    "[STD] Load model wieghts found in the given directory.\n"
+    "If the directory doesn't exist, doesn't contain valid weights,\n"
+    "or doesn't contain a checkpoint,\n"
+    "an error will be thrown.\n",
+    "");
+  arg_parser.add_option(
+    MAX_RNG_SEEDS_DISPLAY,
+    {"--rng_seeds_per_trainer_to_display"},
+    utils::ENV("LBANN_RNG_SEEDS_PER_TRAINER_TO_DISPLAY"),
+    "[STD] Limit how many random seeds LBANN should display "
+    "from each trainer",
+    2);
+  arg_parser.add_option(METADATA, {"--metadata"}, "[STD] TODO", "");
   arg_parser.add_option(MINI_BATCH_SIZE,
                         {"--mini_batch_size"},
                         "[STD] Size of mini batches",
                         -1);
-  arg_parser.add_option(MODEL,
-                        {"--model"},
-                        "[STD] TODO",
-                        "");
+  arg_parser.add_option(MODEL, {"--model"}, "[STD] TODO", "");
   arg_parser.add_option(NUM_EPOCHS,
                         {"--num_epochs"},
                         "[STD] Number of epochs to train model",
@@ -157,10 +158,7 @@ void construct_std_options() {
                         utils::ENV("LBANN_NUM_VALIDATE_SAMPLES"),
                         "[STD] Set the number of validate samples to ingest.",
                         -1);
-  arg_parser.add_option(OPTIMIZER,
-                        {"--optimizer"},
-                        "[STD] TODO",
-                        "");
+  arg_parser.add_option(OPTIMIZER, {"--optimizer"}, "[STD] TODO", "");
   arg_parser.add_option(PROCS_PER_TRAINER,
                         {"--procs_per_trainer"},
                         utils::ENV("LBANN_PROCS_PER_TRAINER"),
@@ -180,22 +178,21 @@ void construct_std_options() {
                         {"--random_seed", "--rand_seed"},
                         "[STD] Value to seed RNG",
                         -1);
-  arg_parser.add_option(READER,
-                        {"--reader"},
-                        "[STD] TODO",
-                        "");
-  arg_parser.add_option(RESTART_DIR,
-                        {"--restart_dir"},
-                        "[STD] Restart from a checkpoint found in the given directory.\n"
-                        "If the directory doesn't exist or doesn't contain a checkpoint,\n"
-                        "an error will be thrown.\n",
-                        "");
-  arg_parser.add_option(TRAINER_CREATE_TWO_MODELS,
-                        {"--trainer_create_two_models"},
-                        utils::ENV("LBANN_TRAINER_CREATE_TWO_MODELS"),
-                        "[STD] Create two models (one each for primary and secondary grid). "
-                        "Default is False.",
-                        false);
+  arg_parser.add_option(READER, {"--reader"}, "[STD] TODO", "");
+  arg_parser.add_option(
+    RESTART_DIR,
+    {"--restart_dir"},
+    "[STD] Restart from a checkpoint found in the given directory.\n"
+    "If the directory doesn't exist or doesn't contain a checkpoint,\n"
+    "an error will be thrown.\n",
+    "");
+  arg_parser.add_option(
+    TRAINER_CREATE_TWO_MODELS,
+    {"--trainer_create_two_models"},
+    utils::ENV("LBANN_TRAINER_CREATE_TWO_MODELS"),
+    "[STD] Create two models (one each for primary and secondary grid). "
+    "Default is False.",
+    false);
   arg_parser.add_option(TRAINER_GRID_HEIGHT,
                         {"--trainer_grid_height"},
                         utils::ENV("LBANN_TRAINER_GRID_HEIGHT"),
@@ -210,7 +207,8 @@ void construct_std_options() {
                         0);
 }
 
-void construct_datastore_options() {
+void construct_datastore_options()
+{
   auto& arg_parser = global_argument_parser();
 
   // Bool flags
@@ -247,16 +245,15 @@ void construct_datastore_options() {
                         "");
 }
 
-void construct_datareader_options() {
+void construct_datareader_options()
+{
   auto& arg_parser = global_argument_parser();
 
   // Bool flags
   arg_parser.add_flag(ALL_GATHER_OLD,
                       {"--all_gather_old"},
                       "[DATAREADER] TODO");
-  arg_parser.add_flag(CHECK_DATA,
-                      {"--check_data"},
-                      "[DATAREADER] TODO");
+  arg_parser.add_flag(CHECK_DATA, {"--check_data"}, "[DATAREADER] TODO");
   arg_parser.add_flag(CREATE_TARBALL,
                       {"--create_tarball"},
                       "[DATAREADER] TODO");
@@ -287,31 +284,26 @@ void construct_datareader_options() {
   arg_parser.add_flag(NODE_SIZES_VARY,
                       {"--node_sizes_vary"},
                       "[DATAREADER] TODO");
-  arg_parser.add_flag(QUIET,
-                      {"--quiet"},
-                      "[DATAREADER] TODO");
+  arg_parser.add_flag(QUIET, {"--quiet"}, "[DATAREADER] TODO");
   arg_parser.add_flag(STACK_TRACE_TO_FILE,
                       {"--stack_trace_to_file"},
                       "[DATAREADER] TODO");
-  arg_parser.add_flag(TEST_ENCODE,
-                      {"--test_encode"},
-                      "[DATAREADER] TODO");
+  arg_parser.add_flag(TEST_ENCODE, {"--test_encode"}, "[DATAREADER] TODO");
   arg_parser.add_flag(WRITE_SAMPLE_LABEL_LIST,
                       {"--write_sample_label_list"},
                       "[DATAREADER] TODO");
-  arg_parser.add_flag(Z_SCORE,
-                      {"--z_score"},
-                      "[DATAREADER] TODO");
+  arg_parser.add_flag(Z_SCORE, {"--z_score"}, "[DATAREADER] TODO");
 
   // Input options
   arg_parser.add_option(ABSOLUTE_SAMPLE_COUNT,
                         {"--absolute_sample_count"},
                         "[DATAREADER] TODO",
                         -1);
-  arg_parser.add_option(DATA_FILEDIR,
-                        {"--data_filedir"},
-                        "[DATAREADER] Sets the file direcotry for train and test data",
-                        "");
+  arg_parser.add_option(
+    DATA_FILEDIR,
+    {"--data_filedir"},
+    "[DATAREADER] Sets the file direcotry for train and test data",
+    "");
   arg_parser.add_option(DATA_FILEDIR_TEST,
                         {"--data_filedir_test"},
                         "[DATAREADER] TODO",
@@ -340,10 +332,7 @@ void construct_datareader_options() {
                         {"--data_reader_percent"},
                         "[DATAREADER] TODO",
                         (float)-1);
-  arg_parser.add_option(DELIMITER,
-                        {"--delimiter"},
-                        "[DATAREADER] TODO",
-                        "");
+  arg_parser.add_option(DELIMITER, {"--delimiter"}, "[DATAREADER] TODO", "");
   arg_parser.add_option(IMAGE_SIZES_FILENAME,
                         {"--image_sizes_filename"},
                         "[DATAREADER] TODO",
@@ -364,14 +353,8 @@ void construct_datareader_options() {
                         {"--normalization"},
                         "[DATAREADER] TODO",
                         "");
-  arg_parser.add_option(N_LINES,
-                        {"--n_lines"},
-                        "[DATAREADER] TODO",
-                        -1);
-  arg_parser.add_option(PAD_INDEX,
-                        {"--pad_index"},
-                        "[DATAREADER] TODO",
-                        -1);
+  arg_parser.add_option(N_LINES, {"--n_lines"}, "[DATAREADER] TODO", -1);
+  arg_parser.add_option(PAD_INDEX, {"--pad_index"}, "[DATAREADER] TODO", -1);
   arg_parser.add_option(PILOT2_READ_FILE_SIZES,
                         {"--pilot2_read_file_sizes"},
                         "[DATAREADER] TODO",
@@ -401,61 +384,31 @@ void construct_datareader_options() {
                         utils::ENV("LBANN_SMILES_BUFFER_SIZE"),
                         "[DATAREADER] Size of the read buffer for the SMILES "
                         "data reader.",
-                        16*1024*1024UL);
+                        16 * 1024 * 1024UL);
   arg_parser.add_option(TEST_TARBALL,
                         {"--test_tarball"},
                         "[DATAREADER] TODO",
                         -1);
-  arg_parser.add_option(VOCAB,
-                        {"--vocab"},
-                        "[DATAREADER] TODO",
-                        "");
+  arg_parser.add_option(VOCAB, {"--vocab"}, "[DATAREADER] TODO", "");
 }
 
-void construct_jag_options() {
+void construct_jag_options()
+{
   auto& arg_parser = global_argument_parser();
 
   // Bool flags
-  arg_parser.add_flag(JAG,
-                      {"--jag"},
-                      "[JAG] TODO");
-  arg_parser.add_flag(JAG_PARTITIONED,
-                      {"--jag_partitioned"},
-                      "[JAG] TODO");
+  arg_parser.add_flag(JAG, {"--jag"}, "[JAG] TODO");
+  arg_parser.add_flag(JAG_PARTITIONED, {"--jag_partitioned"}, "[JAG] TODO");
 
   // Input options
-  arg_parser.add_option(BASE_DIR,
-                        {"--base_dir"},
-                        "[JAG] TODO",
-                        "");
-  arg_parser.add_option(FILELIST,
-                        {"--filelist"},
-                        "[JAG] TODO",
-                        "");
-  arg_parser.add_option(FILENAME,
-                        {"--filename"},
-                        "[JAG] TODO",
-                        "");
-  arg_parser.add_option(FORMAT,
-                        {"--format"},
-                        "[JAG] TODO",
-                        "");
-  arg_parser.add_option(INDEX_FN,
-                        {"--index_fn"},
-                        "[JAG] TODO",
-                        "");
-  arg_parser.add_option(MAPPING_FN,
-                        {"--mapping_fn"},
-                        "[JAG] TODO",
-                        "");
-  arg_parser.add_option(NUM_LISTS,
-                        {"--num_lists"},
-                        "[JAG] TODO",
-                        -1);
-  arg_parser.add_option(NUM_SAMPLES,
-                        {"--num_samples"},
-                        "[JAG] TODO",
-                        -1);
+  arg_parser.add_option(BASE_DIR, {"--base_dir"}, "[JAG] TODO", "");
+  arg_parser.add_option(FILELIST, {"--filelist"}, "[JAG] TODO", "");
+  arg_parser.add_option(FILENAME, {"--filename"}, "[JAG] TODO", "");
+  arg_parser.add_option(FORMAT, {"--format"}, "[JAG] TODO", "");
+  arg_parser.add_option(INDEX_FN, {"--index_fn"}, "[JAG] TODO", "");
+  arg_parser.add_option(MAPPING_FN, {"--mapping_fn"}, "[JAG] TODO", "");
+  arg_parser.add_option(NUM_LISTS, {"--num_lists"}, "[JAG] TODO", -1);
+  arg_parser.add_option(NUM_SAMPLES, {"--num_samples"}, "[JAG] TODO", -1);
   arg_parser.add_option(NUM_SAMPLES_PER_FILE,
                         {"--num_samples_per_file"},
                         "[JAG] TODO",
@@ -464,33 +417,22 @@ void construct_jag_options() {
                         {"--num_samples_per_list"},
                         "[JAG] TODO",
                         -1);
-  arg_parser.add_option(NUM_SUBDIRS,
-                        {"--num_subdirs"},
-                        "[JAG] TODO",
-                        -1);
+  arg_parser.add_option(NUM_SUBDIRS, {"--num_subdirs"}, "[JAG] TODO", -1);
   arg_parser.add_option(OUTPUT_BASE_DIR,
                         {"--output_base_dir"},
                         "[JAG] TODO",
                         "");
-  arg_parser.add_option(OUTPUT_BASE_FN,
-                        {"--output_base_fn"},
-                        "[JAG] TODO",
-                        "");
-  arg_parser.add_option(OUTPUT_DIR,
-                        {"--output_dir"},
-                        "[JAG] TODO",
-                        "");
-  arg_parser.add_option(OUTPUT_FN,
-                        {"--output_fn"},
-                        "[JAG] TODO",
-                        "");
+  arg_parser.add_option(OUTPUT_BASE_FN, {"--output_base_fn"}, "[JAG] TODO", "");
+  arg_parser.add_option(OUTPUT_DIR, {"--output_dir"}, "[JAG] TODO", "");
+  arg_parser.add_option(OUTPUT_FN, {"--output_fn"}, "[JAG] TODO", "");
   arg_parser.add_option(SAMPLES_PER_FILE,
                         {"--samples_per_file"},
                         "[JAG] TODO",
                         -1);
 }
 
-void construct_all_options() {
+void construct_all_options()
+{
   construct_std_options();
   construct_datastore_options();
   construct_datareader_options();
diff --git a/src/utils/protobuf_utils.cpp b/src/utils/protobuf_utils.cpp
index 2385404488d..fe45761562a 100644
--- a/src/utils/protobuf_utils.cpp
+++ b/src/utils/protobuf_utils.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -49,14 +49,15 @@ parse_prototext_filenames_from_command_line(
   std::vector<std::string> data_set_metadata;
   bool single_file_load = false;
 
-  std::string params[] = { PROTOTEXT, MODEL, READER, METADATA, OPTIMIZER };
-  for(auto & which : params) {
+  std::string params[] = {PROTOTEXT, MODEL, READER, METADATA, OPTIMIZER};
+  for (auto& which : params) {
     std::string fn = arg_parser.get<std::string>(which);
     if (fn != "") {
       size_t t_pos = fn.find("trainer");
       if(t_pos != std::string::npos) {
-        //append appropriate trainer id to prototext filename
-        std::string fname = fn.substr(0,t_pos+7)+ std::to_string(trainer_rank);
+        // append appropriate trainer id to prototext filename
+        std::string fname =
+          fn.substr(0, t_pos + 7) + std::to_string(trainer_rank);
         fn = fname;
       }
       if (which == PROTOTEXT) {
@@ -82,18 +83,26 @@ parse_prototext_filenames_from_command_line(
     size_t n = models.size();
     if (! (optimizers.size() == 1 || optimizers.size() == n)) {
       LBANN_ERROR(
-        "you specified ", n, " model filenames, and ", optimizers.size(), 
+        "you specified ",
+        n,
+        " model filenames, and ",
+        optimizers.size(),
         " optimizer filenames; you must specify 1 optimizer filenames");
     }
     if (! (readers.size() == 1 || readers.size() == n)) {
-      LBANN_ERROR(
-        "you specified ", n, " model filenames, and ", readers.size(),
-        " reader filenames; you must specify 1 reader filenames");
+      LBANN_ERROR("you specified ",
+                  n,
+                  " model filenames, and ",
+                  readers.size(),
+                  " reader filenames; you must specify 1 reader filenames");
     }
     if (! (data_set_metadata.size() == 0 || data_set_metadata.size() == 1 || data_set_metadata.size() == n)) {
-      LBANN_ERROR(
-        "you specified ", n, " model filenames, and ", data_set_metadata.size(),
-        " data set metadata filenames; you must specify 1 data set metadata filenames");
+      LBANN_ERROR("you specified ",
+                  n,
+                  " model filenames, and ",
+                  data_set_metadata.size(),
+                  " data set metadata filenames; you must specify 1 data set "
+                  "metadata filenames");
     }
   }
 
@@ -162,7 +171,8 @@ load_prototext(
   const bool master,
   const int trainer_rank)
 {
-  auto names = parse_prototext_filenames_from_command_line(master, trainer_rank);
+  auto names =
+    parse_prototext_filenames_from_command_line(master, trainer_rank);
   auto models_out = read_in_prototext_files(master, names);
   if (models_out.size() == 0 && master) {
     LBANN_ERROR("Failed to load any prototext files");
diff --git a/src/utils/stack_profiler.cpp b/src/utils/stack_profiler.cpp
index f508ee02df3..fe1ca19e859 100644
--- a/src/utils/stack_profiler.cpp
+++ b/src/utils/stack_profiler.cpp
@@ -95,7 +95,8 @@ void stack_profiler::activate(int thread) {
         c_hash_fp_full_stack_trace_metadata = fopen("full_stack_trace.txt", "w");
       }
     }
-  } else {
+  }
+  else {
     c_hash_profiling_is_turned_on = 0;
   }
 }
diff --git a/tests/test_shuffled_indices.cpp b/tests/test_shuffled_indices.cpp
index d9cc88d1a7f..fa79274ab8c 100644
--- a/tests/test_shuffled_indices.cpp
+++ b/tests/test_shuffled_indices.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -52,9 +52,7 @@ int main(int argc, char *argv[]) {
     // Initialize options db (this parses the command line)
     auto& arg_parser = global_argument_parser();
     construct_all_options();
-		arg_parser.add_flag("fn",
-												{"--fn"},
-												"TODO");
+    arg_parser.add_flag("fn", {"--fn"}, "TODO");
     arg_parser.parse(argc, argv);
 
     if (arg_parser.help_requested() or argc == 1) {
diff --git a/tests/test_stack_tracing/test_sigint_tracing.cpp b/tests/test_stack_tracing/test_sigint_tracing.cpp
index aa9fef32736..8e63788fb3e 100644
--- a/tests/test_stack_tracing/test_sigint_tracing.cpp
+++ b/tests/test_stack_tracing/test_sigint_tracing.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -70,9 +70,7 @@ int main(int argc, char *argv[]) {
   try {
     auto& arg_parser = global_argument_parser();
     construct_std_options();
-    arg_parser.add_flag("catch signals",
-                        {"--catch-signals"},
-                        "TODO");
+    arg_parser.add_flag("catch signals", {"--catch-signals"}, "TODO");
     arg_parser.parse(argc, argv);
 
     //must be called after opts->init(); must also specify "--catch-signals"
diff --git a/tests/test_stack_tracing/test_sigsev_tracing.cpp b/tests/test_stack_tracing/test_sigsev_tracing.cpp
index 2a7b9cef393..e44d73b16d3 100644
--- a/tests/test_stack_tracing/test_sigsev_tracing.cpp
+++ b/tests/test_stack_tracing/test_sigsev_tracing.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -70,9 +70,7 @@ int main(int argc, char *argv[]) {
   try {
     auto& arg_parser = global_argument_parser();
     construct_std_options();
-    arg_parser.add_flag("catch signals",
-                        {"--catch-signals"},
-                        "TODO");
+    arg_parser.add_flag("catch signals", {"--catch-signals"}, "TODO");
     arg_parser.parse(argc, argv);
 
     //must be called after opts->init(); must also specify "--catch-signals"
diff --git a/unit_test/MPICatchMain.cpp b/unit_test/MPICatchMain.cpp
index 19144ff44a5..6ad653093a4 100644
--- a/unit_test/MPICatchMain.cpp
+++ b/unit_test/MPICatchMain.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
diff --git a/unit_test/SequentialCatchMain.cpp b/unit_test/SequentialCatchMain.cpp
index 82eb00dbce6..d8130f97d23 100644
--- a/unit_test/SequentialCatchMain.cpp
+++ b/unit_test/SequentialCatchMain.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -27,8 +27,8 @@
 #define CATCH_CONFIG_RUNNER
 #include <catch2/catch.hpp>
 #include <lbann/utils/dnn_lib/helpers.hpp>
-#include <lbann/utils/random_number_generators.hpp>
 #include <lbann/utils/options.hpp>
+#include <lbann/utils/random_number_generators.hpp>
 
 int main(int argc, char* argv[]) {
 #ifdef LBANN_HAS_DNN_LIB

From be4ddb9bf73c06445793339fbd9c22dbae37faa9 Mon Sep 17 00:00:00 2001
From: Tom Benson <30674819+benson31@users.noreply.github.com>
Date: Mon, 20 Sep 2021 12:01:31 -0400
Subject: [PATCH 04/37] Add binary-with-constant layers to the Python Front-End
 (#1965)

* add binary-with-constant operators to the PFE

* Update release notes
---
 ReleaseNotes.txt               |   1 +
 python/lbann/core/operators.py | 110 +++++++++++++++++++++++++++++++++
 2 files changed, 111 insertions(+)

diff --git a/ReleaseNotes.txt b/ReleaseNotes.txt
index 42445ec0224..f0c3619bc0b 100644
--- a/ReleaseNotes.txt
+++ b/ReleaseNotes.txt
@@ -18,6 +18,7 @@ Support for new network structures:
 Support for new layers:
 - Added support for 2D Matrices for Scatter and Gather layers
 - Added distributed tensor parallelism with channelwise decomposition for channelwise fully connected layer
+- Added "binary-with-constant" operators
 
 Python front-end:
 
diff --git a/python/lbann/core/operators.py b/python/lbann/core/operators.py
index e21f92c23eb..ca2bc90bf87 100644
--- a/python/lbann/core/operators.py
+++ b/python/lbann/core/operators.py
@@ -101,6 +101,17 @@ def do_export_proto(self):
         params = OpProto.AddOperator()
         return params
 
+class AddConstant(Operator):
+    """Add a constant to each input value (x+c)."""
+    def __init__(self, constant: float = 0.0, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.constant = constant
+
+    def do_export_proto(self):
+        params = OpProto.AddConstantOperator()
+        params.constant = self.constant
+        return params
+
 class Asin(Operator):
     """Apply the Asin operator entrywise."""
     def __init__(self, *args, **kwargs):
@@ -182,6 +193,17 @@ def do_export_proto(self):
         params = OpProto.CeilOperator()
         return params
 
+class ConstantSubtract(Operator):
+    """Subtract each input value from a constant (c-x)."""
+    def __init__(self, constant: float = 0.0, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.constant = constant
+
+    def do_export_proto(self):
+        params = OpProto.ConstantSubtractOperator()
+        params.constant = self.constant
+        return params
+
 class Cos(Operator):
     """Apply the Cos operator entrywise."""
     def __init__(self, *args, **kwargs):
@@ -218,6 +240,17 @@ def do_export_proto(self):
         params = OpProto.EqualOperator()
         return params
 
+class EqualConstant(Operator):
+    """Test each value for equality with a constant (x==c)."""
+    def __init__(self, constant: float = 0.0, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.constant = constant
+
+    def do_export_proto(self):
+        params = OpProto.EqualConstantOperator()
+        params.constant = self.constant
+        return params
+
 class Erf(Operator):
     """Apply the Erf operator entrywise."""
     def __init__(self, *args, **kwargs):
@@ -272,6 +305,17 @@ def do_export_proto(self):
         params = OpProto.GreaterOperator()
         return params
 
+class GreaterConstant(Operator):
+    """Test each value for "greater-than" with a constant (x>c)."""
+    def __init__(self, constant: float = 0.0, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.constant = constant
+
+    def do_export_proto(self):
+        params = OpProto.GreaterConstantOperator()
+        params.constant = self.constant
+        return params
+
 class GreaterEqual(Operator):
     """Apply the GreaterEqual operator entrywise."""
     def __init__(self, *args, **kwargs):
@@ -281,6 +325,17 @@ def do_export_proto(self):
         params = OpProto.GreaterEqualOperator()
         return params
 
+class GreaterEqualConstant(Operator):
+    """Test each value for "greater-than-or-equal-to" with a constant (x>=c)."""
+    def __init__(self, constant: float = 0.0, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.constant = constant
+
+    def do_export_proto(self):
+        params = OpProto.GreaterEqualConstantOperator()
+        params.constant = self.constant
+        return params
+
 class Less(Operator):
     """Apply the Less operator entrywise."""
     def __init__(self, *args, **kwargs):
@@ -290,6 +345,17 @@ def do_export_proto(self):
         params = OpProto.LessOperator()
         return params
 
+class LessConstant(Operator):
+    """Test each value for "less-than" with a constant (x<c)."""
+    def __init__(self, constant: float = 0.0, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.constant = constant
+
+    def do_export_proto(self):
+        params = OpProto.LessConstantOperator()
+        params.constant = self.constant
+        return params
+
 class LessEqual(Operator):
     """Apply the LessEqual operator entrywise."""
     def __init__(self, *args, **kwargs):
@@ -299,6 +365,17 @@ def do_export_proto(self):
         params = OpProto.LessEqualOperator()
         return params
 
+class LessEqualConstant(Operator):
+    """Test each value for "less-than-or-equal-to with a constant (x<=c)."""
+    def __init__(self, constant: float = 0.0, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.constant = constant
+
+    def do_export_proto(self):
+        params = OpProto.LessEqualConstantOperator()
+        params.constant = self.constant
+        return params
+
 class Log(Operator):
     """Apply the Log operator entrywise."""
     def __init__(self, *args, **kwargs):
@@ -416,6 +493,17 @@ def do_export_proto(self):
         params = OpProto.NotEqualOperator()
         return params
 
+class NotEqualConstant(Operator):
+    """Test each value for inequality with a constant (x!=c)."""
+    def __init__(self, constant: float = 0.0, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.constant = constant
+
+    def do_export_proto(self):
+        params = OpProto.NotEqualConstantOperator()
+        params.constant = self.constant
+        return params
+
 class Pow(Operator):
     """Apply the Pow operator entrywise."""
     def __init__(self, *args, **kwargs):
@@ -470,6 +558,17 @@ def do_export_proto(self):
         params = OpProto.SafeReciprocalOperator()
         return params
 
+class Scale(Operator):
+    """Scale each input value by a constant value (c*x)."""
+    def __init__(self, constant: float = 0.0, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.constant = constant
+
+    def do_export_proto(self):
+        params = OpProto.ScaleOperator()
+        params.constant = self.constant
+        return params
+
 class Selu(Operator):
     """Apply the Selu operator entrywise."""
     def __init__(self, *args, **kwargs):
@@ -578,6 +677,17 @@ def do_export_proto(self):
         params = OpProto.SubtractOperator()
         return params
 
+class SubtractConstant(Operator):
+    """Subtract a constant from each input value (x-c)."""
+    def __init__(self, constant: float = 0.0, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.constant = constant
+
+    def do_export_proto(self):
+        params = OpProto.SubtractConstantOperator()
+        params.constant = self.constant
+        return params
+
 class Tan(Operator):
     """Apply the Tan operator entrywise."""
     def __init__(self, *args, **kwargs):

From 92b77125baa860861d9d87ca2e1202a3e60838f3 Mon Sep 17 00:00:00 2001
From: Tim Moon <moon13@llnl.gov>
Date: Mon, 20 Sep 2021 10:57:22 -0700
Subject: [PATCH 05/37] Generate Python layer class for each operator class
 (#1968)

---
 .../test_unit_module_fully_connected.py       | 183 ++++++++++++++++++
 python/lbann/__init__.py                      |   9 +-
 python/lbann/core/operator_layers.py          |  38 +++-
 3 files changed, 218 insertions(+), 12 deletions(-)
 create mode 100644 bamboo/unit_tests/test_unit_module_fully_connected.py

diff --git a/bamboo/unit_tests/test_unit_module_fully_connected.py b/bamboo/unit_tests/test_unit_module_fully_connected.py
new file mode 100644
index 00000000000..9a13638116b
--- /dev/null
+++ b/bamboo/unit_tests/test_unit_module_fully_connected.py
@@ -0,0 +1,183 @@
+import functools
+import operator
+import os
+import os.path
+import sys
+import numpy as np
+
+# Bamboo utilities
+current_file = os.path.realpath(__file__)
+current_dir = os.path.dirname(current_file)
+sys.path.insert(0, os.path.join(os.path.dirname(current_dir), 'common_python'))
+import tools
+
+# ==============================================
+# Objects for Python data reader
+# ==============================================
+# Note: The Python data reader imports this file as a module and calls
+# the functions below to ingest data.
+
+# Data
+np.random.seed(20210917)
+_num_samples = 3
+_input_size = 5
+_samples = np.random.normal(size=(_num_samples,2,_input_size)).astype(np.float32)
+
+# Sample access functions
+def get_sample(index):
+    return _samples[index].flatten()
+def num_samples():
+    return _num_samples
+def sample_dims():
+    return (2*_input_size,)
+
+# ==============================================
+# Setup LBANN experiment
+# ==============================================
+
+def setup_experiment(lbann):
+    """Construct LBANN experiment.
+
+    Args:
+        lbann (module): Module for LBANN Python frontend
+
+    """
+    mini_batch_size = num_samples()
+    trainer = lbann.Trainer(mini_batch_size)
+    model = construct_model(lbann)
+    data_reader = construct_data_reader(lbann)
+    optimizer = lbann.NoOptimizer()
+    return trainer, model, data_reader, optimizer
+
+def construct_model(lbann):
+    """Construct LBANN model.
+
+    Args:
+        lbann (module): Module for LBANN Python frontend
+
+    """
+
+    # ------------------------------------------
+    # NumPy implementation
+    # ------------------------------------------
+
+    w_np = np.random.normal(size=(1,_input_size)).astype(np.float32)
+    b_np = np.random.normal(size=(1,1)).astype(np.float32)
+    w = w_np.astype(np.float64)
+    b = b_np.astype(np.float64)
+    x0 = _samples[:,0,:].astype(np.float64)
+    x1 = _samples[:,1,:].astype(np.float64)
+    y0 = np.tanh(np.matmul(x0, w.transpose()) + b)
+    y1 = np.tanh(np.matmul(x1, w.transpose()) + b)
+    y0_np = np.mean(y0)
+    y1_np = np.mean(y1)
+
+    # ------------------------------------------
+    # LBANN implementation
+    # ------------------------------------------
+
+    # Objects for LBANN model
+    metrics = []
+    callbacks = []
+
+    # Input data
+    x = lbann.Slice(
+        lbann.Input(data_field='samples'),
+        slice_points=tools.str_list([0, _input_size, _input_size*2]),
+    )
+    x0 = lbann.Identity(x)
+    x1 = lbann.Identity(x)
+
+    # Fully-connected module
+    import lbann.modules
+    fc = lbann.modules.FullyConnectedModule(
+        1,
+        bias=True,
+        weights=[
+            lbann.Weights(
+                initializer=lbann.ValueInitializer(
+                    values=tools.str_list(np.nditer(w_np)))),
+            lbann.Weights(
+                initializer=lbann.ValueInitializer(
+                    values=tools.str_list(np.nditer(b_np)))),
+        ],
+        activation=lbann.Tanh,
+    )
+
+    # y1
+    y1 = fc(x1)
+    tol = abs(8 * y0_np * np.finfo(np.float32).eps)
+    metrics.append(lbann.Metric(y1, name='y1'))
+    callbacks.append(lbann.CallbackCheckMetric(
+        metric=metrics[-1].name,
+        lower_bound=y1_np-tol,
+        upper_bound=y1_np+tol,
+        error_on_failure=True,
+        execution_modes='test'))
+
+    # y0
+    y0 = fc(x0)
+    tol = abs(8 * y0_np * np.finfo(np.float32).eps)
+    metrics.append(lbann.Metric(y0, name='y0'))
+    callbacks.append(lbann.CallbackCheckMetric(
+        metric=metrics[-1].name,
+        lower_bound=y0_np-tol,
+        upper_bound=y0_np+tol,
+        error_on_failure=True,
+        execution_modes='test'))
+
+    # ------------------------------------------
+    # Construct model
+    # ------------------------------------------
+
+    num_epochs = 0
+    return lbann.Model(num_epochs,
+                       layers=lbann.traverse_layer_graph(x),
+                       metrics=metrics,
+                       callbacks=callbacks)
+
+def construct_data_reader(lbann):
+    """Construct Protobuf message for Python data reader.
+
+    The Python data reader will import the current Python file to
+    access the sample access functions.
+
+    Args:
+        lbann (module): Module for LBANN Python frontend
+
+    """
+
+    # Note: The training data reader should be removed when
+    # https://github.com/LLNL/lbann/issues/1098 is resolved.
+    message = lbann.reader_pb2.DataReader()
+    message.reader.extend([
+        tools.create_python_data_reader(
+            lbann,
+            current_file,
+            'get_sample',
+            'num_samples',
+            'sample_dims',
+            'train'
+        )
+    ])
+    message.reader.extend([
+        tools.create_python_data_reader(
+            lbann,
+            current_file,
+            'get_sample',
+            'num_samples',
+            'sample_dims',
+            'test'
+        )
+    ])
+    return message
+
+# ==============================================
+# Setup PyTest
+# ==============================================
+
+# Create test functions that can interact with PyTest
+# Note: Create test name by removing ".py" from file name
+_test_name = os.path.splitext(os.path.basename(current_file))[0]
+for _test_func in tools.create_tests(setup_experiment, _test_name):
+    globals()[_test_func.__name__] = _test_func
diff --git a/python/lbann/__init__.py b/python/lbann/__init__.py
index 6f61b62a75b..9e6b19b34fe 100644
--- a/python/lbann/__init__.py
+++ b/python/lbann/__init__.py
@@ -9,8 +9,13 @@
     raise ImportError('Python 3 is required')
 
 # Try getting build-specific paths from config file
-_config_file = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-                            'python_config.ini')
+if 'LBANN_PYTHON_CONFIG_FILE' in os.environ:
+    _config_file = os.environ['LBANN_PYTHON_CONFIG_FILE']
+else:
+    _config_file = os.path.join(
+        os.path.dirname(os.path.realpath(__file__)),
+        'python_config.ini',
+    )
 _lbann_exe = None
 _lbann_has_proto_definitions = False
 if os.path.isfile(_config_file):
diff --git a/python/lbann/core/operator_layers.py b/python/lbann/core/operator_layers.py
index a9bcab127fe..cfb56f2d0be 100644
--- a/python/lbann/core/operator_layers.py
+++ b/python/lbann/core/operator_layers.py
@@ -12,24 +12,42 @@
 import lbann.core.operators
 
 def generate_operator_layer(operator_class):
+    """Create operator layer class for a single operator
 
-    def create_layer(*args, **kwargs):
-        # Yeahhhh this seems like a GREAT idea... But it honestly
-        # seems dumber to copy the list from layers.py, thereby
-        # creating two glaring maintenance issues where there is
-        # currently only one.
-        layer_keys = lbann.Layer.__init__.__kwdefaults__.keys()
-        layer_kwargs = { k: v for k,v in kwargs.items() if k in layer_keys }
-        op_kwargs = { k: v for k,v in kwargs.items() if k not in layer_keys }
+    Returns a class that inherits from lbann.OperatorLayer.
 
+    Args:
+        operator_class (type): A derived class of
+            lbann.operators.Operator
+
+    """
+
+    def __init__(self, *args, **kwargs):
+        """Operator layer with a single operator
+
+        Forwards arguments to lbann.OperatorLayer or sub-class of
+        lbann.Operator.
+
+        """
+        layer_kwargs = lbann.Layer.__init__.__kwdefaults__.copy()
+        op_kwargs = {}
+        for key, value in kwargs.items():
+            if key in layer_kwargs:
+                layer_kwargs[key] = value
+            else:
+                op_kwargs[key] = value
         layer_kwargs['ops'] = [ operator_class(**op_kwargs) ]
-        return OperatorLayer(*args, **layer_kwargs)
+        OperatorLayer.__init__(self, *args, **layer_kwargs)
 
-    return create_layer
+    # Return operator layer class
+    class_name = operator_class.__name__
+    class_dict = {'__init__': __init__}
+    return type(class_name, (OperatorLayer,), class_dict)
 
 def is_operator_class(obj):
     return inspect.isclass(obj) and issubclass(obj, lbann.core.operators.Operator) and obj is not lbann.core.operators.Operator
 
+# Generate operator layer classes based on operator classes
 ops_classes = inspect.getmembers(lbann.core.operators, is_operator_class)
 for op in ops_classes:
     op_name, op_class = op

From 95f433f90093ec4232afcdf8dbfdbd9f059c0e79 Mon Sep 17 00:00:00 2001
From: Tom Benson <30674819+benson31@users.noreply.github.com>
Date: Mon, 20 Sep 2021 13:59:08 -0400
Subject: [PATCH 06/37] Fix an erroneous comment; this was leftover after some
 refactoring and missed in review. (#1971)

---
 include/lbann/execution_algorithms/factory.hpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/include/lbann/execution_algorithms/factory.hpp b/include/lbann/execution_algorithms/factory.hpp
index 6f64f01ee29..f2bd8720874 100644
--- a/include/lbann/execution_algorithms/factory.hpp
+++ b/include/lbann/execution_algorithms/factory.hpp
@@ -69,11 +69,9 @@ void register_new_training_algorithm(TrainingAlgorithmKey key,
 
 } // namespace lbann
 
-/** @brief Get the factory for a given key from the default factory
- *         factory.
- *  @param[in] key The identifier for the training algorithm.
- *  @return The abstract factory that can build components of the
- *          requested training algorithm.
+/** @brief Create a new training_algorithm instance.
+ *  @param[in] params A protobuf message describing the algorithm.
+ *  @return A newly-constructed training algorithm.
  */
 template <>
 std::unique_ptr<lbann::training_algorithm>

From b53eb01a629e9055c4b917ab636faeed21d3aa1e Mon Sep 17 00:00:00 2001
From: Michael Wyatt <wyatt5@llnl.gov>
Date: Mon, 20 Sep 2021 11:00:04 -0700
Subject: [PATCH 07/37] added environment variables to description string
 (#1969)

---
 include/lbann/utils/argument_parser.hpp | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/include/lbann/utils/argument_parser.hpp b/include/lbann/utils/argument_parser.hpp
index 256292917e9..d5b2ddf5d54 100644
--- a/include/lbann/utils/argument_parser.hpp
+++ b/include/lbann/utils/argument_parser.hpp
@@ -299,9 +299,14 @@ class argument_parser : ErrorHandler
            std::string const& description)
   {
     if (env.exists() && env.template value<bool>())
-      return add_flag_impl_(name, std::move(cli_flags), description, true);
+      return add_flag_impl_(name,
+                            std::move(cli_flags),
+                            description + "\nENV: {" + env.name() + "}",
+                            true);
     else
-      return add_flag(name, std::move(cli_flags), description);
+      return add_flag(name,
+                      std::move(cli_flags),
+                      description + "\nENV: {" + env.name() + "}");
   }
 
   /** @brief Add an additional named option.
@@ -368,10 +373,14 @@ class argument_parser : ErrorHandler
              T default_value = T())
   {
     if (env.exists())
-      return add_option(name, std::move(cli_flags), description,
+      return add_option(name,
+                        std::move(cli_flags),
+                        description + "\nENV: {" + env.name() + "}",
                         env.template value<T>());
     else
-      return add_option(name, std::move(cli_flags), description,
+      return add_option(name,
+                        std::move(cli_flags),
+                        description + "\nENV: {" + env.name() + "}",
                         std::move(default_value));
   }
 
@@ -429,8 +438,11 @@ class argument_parser : ErrorHandler
              std::string const& description,
              char const* default_value)
   {
-    return add_option(name, cli_flags, std::move(env),
-                      description, std::string(default_value));
+    return add_option(name,
+                      cli_flags,
+                      std::move(env),
+                      description + "\nENV: {" + env.name() + "}",
+                      std::string(default_value));
   }
 
   /** @brief Add an optional positional argument.

From c7c8e181ee59f5b79fedaeb44cd631049259f7ba Mon Sep 17 00:00:00 2001
From: Tim Moon <moon13@llnl.gov>
Date: Mon, 20 Sep 2021 15:32:17 -0700
Subject: [PATCH 08/37] Always mix MPI rank within trainer into RNG seeds
 (#1962)

---
 .../lbann/utils/random_number_generators.hpp  |  5 +-
 src/utils/lbann_library.cpp                   |  7 --
 src/utils/random_number_generators.cpp        | 68 ++++++++-----------
 3 files changed, 30 insertions(+), 50 deletions(-)

diff --git a/include/lbann/utils/random_number_generators.hpp b/include/lbann/utils/random_number_generators.hpp
index c571d203b61..18cc01e662a 100644
--- a/include/lbann/utils/random_number_generators.hpp
+++ b/include/lbann/utils/random_number_generators.hpp
@@ -125,8 +125,9 @@ fast_rng_gen& get_fast_io_generator();
 /** @brief Initialize the random number generator (with optional seed).
  *
  *  @param seed Seed value for the random number generator
- *  @param comm If present, mixes the process's rank within the model
- *              into the seed; if not, uses the MPI world rank.
+ *  @param comm If present, mixes the process's rank within the
+ *              trainer into the seed; if not, uses the MPI world
+ *              rank.
  *
  */
 void init_random(int seed = -1, int num_io_RNGs = 1, lbann_comm *comm = nullptr);
diff --git a/src/utils/lbann_library.cpp b/src/utils/lbann_library.cpp
index 222e843f810..3854e29cf28 100644
--- a/src/utils/lbann_library.cpp
+++ b/src/utils/lbann_library.cpp
@@ -227,15 +227,8 @@ trainer& construct_trainer(lbann_comm* comm,
 #ifndef LBANN_DETERMINISTIC
   if (!pb_trainer->random_init_trainers_identically()) {
     random_seed = hash_combine(random_seed, comm->get_trainer_rank());
-    // Also update the data sequence random seed
     data_seq_random_seed = random_seed;
   }
-
-  // Under normal conditions, reinitialize the random number generator so
-  // that regularization techniques (e.g. dropout) generate unique patterns
-  // on different ranks.
-  // At this point the data sequence random seed is no longer updated
-  random_seed = hash_combine(random_seed, comm->get_rank_in_world());
 #else
   if(comm->am_world_master()) {
     std::cout <<
diff --git a/src/utils/random_number_generators.cpp b/src/utils/random_number_generators.cpp
index d43ad6eda33..933dc8a8b65 100644
--- a/src/utils/random_number_generators.cpp
+++ b/src/utils/random_number_generators.cpp
@@ -130,52 +130,40 @@ fast_rng_gen& get_fast_io_generator() {
 void init_random(int seed, int num_io_RNGs, lbann_comm *comm) {
   generator_inited = true;
   fast_generator_inited = true;
-  if (seed != -1) {
-    // Seed every OpenMP thread, if present.
-    // Note: Threadprivate OMP variables don't work with dynamic threads.
-#ifdef _OPENMP
-    #pragma omp parallel
-    {
-      get_generator().seed(hash_combine(seed, omp_get_thread_num()));
-      get_fast_generator().seed(hash_combine(seed, omp_get_thread_num()));
-    }
-#else
-    get_generator().seed(seed);
-    get_fast_generator().seed(seed);
-#endif
 
-    // Set Elemental's RNG seed
-    auto elemental_seed = hash_combine(seed, 104729); // 10000th prime
-    int mpi_initialized = 0;
-    MPI_Initialized(&mpi_initialized);
-    if(mpi_initialized) {
-      // If MPI is initialized mix in the rank to ensure that Hydrogen
-      // has good RNGs.  Note that under some configurations LBANN
-      // will not do this, so it is good to ensure that Hydrogen is
-      // well seeded.
-      elemental_seed = (comm == nullptr
-                        ? hash_combine(elemental_seed, El::mpi::Rank(El::mpi::COMM_WORLD))
-                        : hash_combine(elemental_seed, comm->get_rank_in_trainer()));
-    }
-    El::Generator().seed(elemental_seed);
-  } else {
-    // Seed with a random value.
+  // Use different seed on each rank in trainer
+  if (seed == -1) {
     std::random_device rd;
-    unsigned rand_val = rd();
+    seed = rd();
+  }
+  else if (comm != nullptr) {
+    seed = hash_combine(seed, comm->get_rank_in_trainer());
+  }
+  else if (El::mpi::Initialized()) {
+    seed = hash_combine(seed, El::mpi::Rank(El::mpi::COMM_WORLD));
+  }
+
+  // Seed every OpenMP thread, if present.
+  // Note: Threadprivate OMP variables don't work with dynamic threads.
 #ifdef _OPENMP
-    #pragma omp parallel
-    {
-      get_generator().seed(hash_combine(rand_val, omp_get_thread_num()));
-      get_fast_generator().seed(hash_combine(rand_val, omp_get_thread_num()));
-    }
+  #pragma omp parallel
+  {
+    const int thread = omp_get_thread_num();
+    const int thread_seed = hash_combine(seed, thread);
+    get_generator().seed(thread_seed);
+    get_fast_generator().seed(hash_combine(thread_seed, 132241)); // 12345th prime
+  }
 #else
-    get_generator().seed(rand_val);
-    get_fast_generator().seed(rand_val);
+  get_generator().seed(seed);
+  get_fast_generator().seed(hash_combine(seed, 41263)); // 4321th prime
 #endif
-    El::Generator().seed(rand_val);
-  }
 
+  // Set Elemental's RNG seed
+  El::Generator().seed(hash_combine(seed, 104729)); // 10000th prime
+
+  // Initialize IO RNGs
   init_io_random(seed, num_io_RNGs);
+
 }
 
 void init_data_seq_random(int seed) {
@@ -193,8 +181,6 @@ void init_data_seq_random(int seed) {
 
 void init_ltfb_random(int seed) {
   if (seed == -1) {
-    // Seed with a random value.
-    std::random_device rd;
     seed = 20201003;
   }
 

From dc583f9cead336225a7567d057d8b1ff92b07637 Mon Sep 17 00:00:00 2001
From: Brian Van Essen <vanessen1@llnl.gov>
Date: Wed, 22 Sep 2021 09:53:03 -0700
Subject: [PATCH 09/37] Bugfix data reader catch2 tests (#1975)

* Fixed the initialization of the public API synthetic data reader test to match the new initialization

* Fix the initialization of the trainer in the model test
---
 .../data_reader_synthetic_test_public_api.cpp          | 10 +++++++---
 src/models/unit_test/model_test.cpp                    |  3 +++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/data_readers/unit_test/data_reader_synthetic_test_public_api.cpp b/src/data_readers/unit_test/data_reader_synthetic_test_public_api.cpp
index f353ab28ad5..a080828211b 100644
--- a/src/data_readers/unit_test/data_reader_synthetic_test_public_api.cpp
+++ b/src/data_readers/unit_test/data_reader_synthetic_test_public_api.cpp
@@ -74,12 +74,16 @@ TEST_CASE("Synthetic data reader public API tests",
 {
   // initialize stuff (boilerplate)
   auto& comm = unit_test::utilities::current_world_comm();
-  lbann::init_random(42, 1);
-  lbann::init_data_seq_random(42);
+  int seed = 42;
+  lbann::init_random(seed, 1);
+  lbann::init_data_seq_random(seed);
 
   // Create a local copy of the RNG to check the synthetic data reader
   lbann::fast_rng_gen ref_fast_generator;
-  ref_fast_generator.seed(lbann::hash_combine(42, 0));
+  // Mix in the rank in trainer
+  seed = lbann::hash_combine(seed, comm.get_rank_in_trainer());
+  // Mix in the I/O thread rank
+  ref_fast_generator.seed(lbann::hash_combine(seed, 0));
 
   // Initalize a per-trainer I/O thread pool
   auto io_thread_pool = lbann::make_unique<lbann::thread_pool>();
diff --git a/src/models/unit_test/model_test.cpp b/src/models/unit_test/model_test.cpp
index 823abf81a4b..1924b09d0ec 100644
--- a/src/models/unit_test/model_test.cpp
+++ b/src/models/unit_test/model_test.cpp
@@ -35,6 +35,7 @@
 #include <lbann/layers/io/input_layer.hpp>
 #include <lbann/utils/memory.hpp>
 #include <lbann/utils/serialize.hpp>
+#include <lbann/utils/lbann_library.hpp>
 #include <lbann/proto/factories.hpp>
 
 #include <lbann.pb.h>
@@ -62,6 +63,8 @@ auto make_model(lbann::lbann_comm& comm)
   lbann_data::LbannPB my_proto;
   if (!pb::TextFormat::ParseFromString(model_prototext, &my_proto))
     throw "Parsing protobuf failed.";
+  // Construct a trainer so that the model can register the input layer
+  lbann::construct_trainer(&comm, my_proto.mutable_trainer(), my_proto);
   auto metadata = mock_datareader_metadata();
   auto my_model = lbann::proto::construct_model(&comm,
                                                 -1,

From bbd30fc03997dfe95c2a5ad79bf41a2c45c8f6a4 Mon Sep 17 00:00:00 2001
From: Tim Moon <moon13@llnl.gov>
Date: Wed, 22 Sep 2021 09:56:30 -0700
Subject: [PATCH 10/37] Fix incorrect sample dims in numpy_npz_conduit_reader
 (#1974)

Channel dimension was being excluded.
---
 src/data_readers/data_reader_numpy_npz_conduit.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/data_readers/data_reader_numpy_npz_conduit.cpp b/src/data_readers/data_reader_numpy_npz_conduit.cpp
index 5cdcc446bb3..279678cd1a0 100644
--- a/src/data_readers/data_reader_numpy_npz_conduit.cpp
+++ b/src/data_readers/data_reader_numpy_npz_conduit.cpp
@@ -389,7 +389,7 @@ void numpy_npz_conduit_reader::fill_in_metadata() {
   for (int k=1; k<shape_num_elts; k++) {
     m_data_dims.push_back(shape[k]);
   }
-  m_num_features = std::accumulate(m_data_dims.begin() + 1,
+  m_num_features = std::accumulate(m_data_dims.begin(),
                                    m_data_dims.end(),
                                    (unsigned) 1,
                                    std::multiplies<unsigned>());

From 2ac9c0224ca7f7819ed43d6fcd0129bd2f3c0669 Mon Sep 17 00:00:00 2001
From: Tim Moon <moon13@llnl.gov>
Date: Fri, 24 Sep 2021 11:42:07 -0700
Subject: [PATCH 11/37] Optionally use default-allocated GPU memory for
 long-lived buffers (#1970)

* Use default-allocated GPU memory for forward prop data tensors

* Use default-allocated GPU memory for weights, SGD, and Adam

* Hide reference to GPU enum in Adam for CPU builds

* Add option to allocate GPU activations with GPU memory pool

* Add option to allocation GPU weights with GPU memory pool

Also affects GPU SGD and Adam buffers
---
 include/lbann/utils/options.hpp   |  1 +
 src/layers/data_type_layer.cpp    | 20 ++++++++++
 src/layers/matrix_builder.hpp     |  2 +-
 src/optimizers/adam.cpp           | 12 ++++++
 src/optimizers/sgd.cpp            | 10 +++++
 src/utils/options.cpp             |  8 ++++
 src/weights/data_type_weights.cpp | 65 ++++++++++++++++++++-----------
 7 files changed, 94 insertions(+), 24 deletions(-)

diff --git a/include/lbann/utils/options.hpp b/include/lbann/utils/options.hpp
index 549f8c484cb..1b796c993e3 100644
--- a/include/lbann/utils/options.hpp
+++ b/include/lbann/utils/options.hpp
@@ -29,6 +29,7 @@ namespace lbann {
 #define USE_LTFB "ltfb"
 #define VERBOSE "verbose"
 #define WRITE_SAMPLE_LIST "write_sample_list"
+#define USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP "Use Hydrogen's default memory mode for GPU buffers in forward prop"
 
 // Input options
 #define CKPT_DIR "ckpt_dir"
diff --git a/src/layers/data_type_layer.cpp b/src/layers/data_type_layer.cpp
index 44a0a1b97ea..1721ff22a37 100644
--- a/src/layers/data_type_layer.cpp
+++ b/src/layers/data_type_layer.cpp
@@ -33,6 +33,8 @@
 #include "lbann/layers/data_type_layer.hpp"
 #include "lbann/models/model.hpp"
 #include "lbann/trainers/trainer.hpp"
+#include "lbann/utils/argument_parser.hpp"
+#include "lbann/utils/options.hpp"
 #include "lbann/utils/summary_impl.hpp"
 #include "lbann/utils/tensor_impl.hpp"
 
@@ -679,6 +681,24 @@ setup_matrices(const El::Grid& grid) {
     }
   }
 
+#ifdef LBANN_HAS_GPU
+  // Use directly-allocated GPU memory for forward prop matrices
+  // Note: GPU memory pool uses more memory and these buffers are
+  // rarely reallocated
+  /// @todo Consider using directly-allocated device memory when
+  /// training with persistent error signals
+  if (this->get_device_allocation() == El::Device::GPU) {
+    const auto& arg_parser = global_argument_parser();
+    if (!arg_parser.get<bool>(USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP)) {
+      for (auto& input : m_inputs) {
+        input->Matrix().SetMemoryMode(0); // Directly-allocated memory
+      }
+      for (auto& output : m_outputs) {
+        output->Matrix().SetMemoryMode(0); // Directly-allocated memory
+      }
+    }
+  }
+#endif // LBANN_HAS_GPU
 
 }
 
diff --git a/src/layers/matrix_builder.hpp b/src/layers/matrix_builder.hpp
index 089554f744f..907115cc397 100644
--- a/src/layers/matrix_builder.hpp
+++ b/src/layers/matrix_builder.hpp
@@ -73,7 +73,7 @@ class DefaultMemoryMatrixBuilder : public MatrixBuilder<T>
   // Pinned host memory; memory-pooled device memory
   static constexpr unsigned memory_mode_ = 1U;
 #elif defined(HYDROGEN_HAVE_GPU)
-  // Pinned host memory; default-allocated device memory
+  // Pinned host memory; directly-allocated device memory
   static constexpr unsigned memory_mode_ = (D == El::Device::CPU ? 1U : 0U);
 #else
   // Default memory
diff --git a/src/optimizers/adam.cpp b/src/optimizers/adam.cpp
index 3d8b38252e1..ca013be510d 100644
--- a/src/optimizers/adam.cpp
+++ b/src/optimizers/adam.cpp
@@ -26,8 +26,10 @@
 
 #include "lbann/optimizers/adam.hpp"
 #include "lbann/optimizers/adam_impl.hpp"
+#include "lbann/utils/argument_parser.hpp"
 #include "lbann/utils/exception.hpp"
 #include "lbann/utils/memory.hpp"
+#include "lbann/utils/options.hpp"
 
 namespace lbann {
 
@@ -107,6 +109,16 @@ void adam<TensorDataType>::setup(WeightsType* w) {
   const auto& gradient = this->get_gradient();
   m_moment1.reset(AbsDistMatrixType::Instantiate(gradient.DistData()));
   m_moment2.reset(AbsDistMatrixType::Instantiate(gradient.DistData()));
+#ifdef LBANN_HAS_GPU
+  if (m_moment1->GetLocalDevice() == El::Device::GPU
+      && m_moment2->GetLocalDevice() == El::Device::GPU) {
+    const auto& arg_parser = global_argument_parser();
+    if (!arg_parser.get<bool>(USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP)) {
+      m_moment1->Matrix().SetMemoryMode(0); // Directly-allocated memory
+      m_moment2->Matrix().SetMemoryMode(0); // Directly-allocated memory
+    }
+  }
+#endif // LBANN_HAS_GPU
   El::Zeros(*m_moment1, gradient.Height(), gradient.Width());
   El::Zeros(*m_moment2, gradient.Height(), gradient.Width());
 }
diff --git a/src/optimizers/sgd.cpp b/src/optimizers/sgd.cpp
index 3831ac4bf90..0da588b55d6 100644
--- a/src/optimizers/sgd.cpp
+++ b/src/optimizers/sgd.cpp
@@ -26,7 +26,9 @@
 
 #include "lbann/optimizers/sgd_impl.hpp"
 #include "lbann/utils/exception.hpp"
+#include "lbann/utils/argument_parser.hpp"
 #include "lbann/utils/memory.hpp"
+#include "lbann/utils/options.hpp"
 
 namespace lbann {
 
@@ -82,6 +84,14 @@ void sgd<TensorDataType>::setup(WeightsType* w) {
   OptimizerType::setup(w);
   const auto& gradient = this->get_gradient();
   m_velocity.reset(AbsDistMatrixType::Instantiate(gradient.DistData()));
+#ifdef LBANN_HAS_GPU
+  if (m_velocity->GetLocalDevice() == El::Device::GPU) {
+    const auto& arg_parser = global_argument_parser();
+    if (!arg_parser.get<bool>(USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP)) {
+      m_velocity->Matrix().SetMemoryMode(0); // Directly-allocated memory
+    }
+  }
+#endif // LBANN_HAS_GPU
   El::Zeros(*m_velocity, gradient.Height(), gradient.Width());
 }
 
diff --git a/src/utils/options.cpp b/src/utils/options.cpp
index b65e3c6a58e..8d7abf1ccb6 100644
--- a/src/utils/options.cpp
+++ b/src/utils/options.cpp
@@ -95,6 +95,14 @@ void construct_std_options()
                       {"--write_sample_list"},
                       "[STD] Writes out the sample list that was loaded into "
                       "the current directory");
+  arg_parser.add_flag(
+    USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP,
+    {"--use_gpu_default_memory_in_forward_prop"},
+    utils::ENV("LBANN_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP"),
+    "[STD] Use Hydrogen's default memory mode for GPU buffers in "
+    "forward prop (namely activations and weights). This will "
+    "typically use a GPU memory pool, which uses more memory than "
+    "directly allocating GPU memory.");
 
   // Input options
   arg_parser.add_option(
diff --git a/src/weights/data_type_weights.cpp b/src/weights/data_type_weights.cpp
index f64df6c574d..50b3cfc4c45 100644
--- a/src/weights/data_type_weights.cpp
+++ b/src/weights/data_type_weights.cpp
@@ -29,8 +29,10 @@
 #include "lbann/weights/data_type_weights.hpp"
 #include "lbann/weights/data_type_weights_impl.hpp"
 #include "lbann/optimizers/optimizer.hpp"
-#include "lbann/utils/exception.hpp"
 #include "lbann/io/file_io.hpp"
+#include "lbann/utils/argument_parser.hpp"
+#include "lbann/utils/exception.hpp"
+#include "lbann/utils/options.hpp"
 
 #include <layers.pb.h>
 
@@ -212,31 +214,48 @@ void data_type_weights<TensorDataType>::set_optimizer(
 template <typename TensorDataType>
 void data_type_weights<TensorDataType>::do_setup_() {
 
-  if (!m_values)
-  {
-    auto matrix_dist = this->get_matrix_distribution();
-    // Construct weights matrix
-    m_values.reset(AbsDistMatrixType::Instantiate(*matrix_dist.grid,
-                                                  matrix_dist.root,
-                                                  matrix_dist.colDist,
-                                                  matrix_dist.rowDist,
-                                                  (matrix_dist.blockHeight == 1
-                                                   && matrix_dist.blockWidth == 1 ?
-                                                   El::ELEMENT : El::BLOCK),
-                                                  matrix_dist.device));
-    m_values->AlignWith(matrix_dist);
-    m_values->Resize(this->get_matrix_height(), this->get_matrix_width());
-    if (m_initializer != nullptr) {
-      m_initializer->fill(*m_values);
-    } else {
-      El::Zero(*m_values);
-    }
+  // Return immediately if possible
+  if (m_values != nullptr) {
+    return;
+  }
 
-    // Setup optimizer
-    if (m_optimizer != nullptr) {
-      m_optimizer->setup(this);
+  // Construct matrix for weights values
+  auto matrix_dist = this->get_matrix_distribution();
+  m_values.reset(
+    AbsDistMatrixType::Instantiate(
+      *matrix_dist.grid,
+      matrix_dist.root,
+      matrix_dist.colDist,
+      matrix_dist.rowDist,
+      (matrix_dist.blockHeight == 1
+       && matrix_dist.blockWidth == 1 ?
+       El::ELEMENT : El::BLOCK),
+      matrix_dist.device));
+
+  // Allocate memory
+#ifdef LBANN_HAS_GPU
+  if (matrix_dist.device == El::Device::GPU) {
+    const auto& arg_parser = global_argument_parser();
+    if (!arg_parser.get<bool>(USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP)) {
+      m_values->Matrix().SetMemoryMode(0); // Directly-allocated memory
     }
   }
+#endif // LBANN_HAS_GPU
+  m_values->AlignWith(matrix_dist);
+  m_values->Resize(this->get_matrix_height(), this->get_matrix_width());
+
+  // Initialize values
+  if (m_initializer != nullptr) {
+    m_initializer->fill(*m_values);
+  } else {
+    El::Zero(*m_values);
+  }
+
+  // Setup optimizer
+  if (m_optimizer != nullptr) {
+    m_optimizer->setup(this);
+  }
+
 }
 
 // -----------------------------------------------

From 1859d778796cf5a711dc2e981c23796fe23440a6 Mon Sep 17 00:00:00 2001
From: Brian Van Essen <vanessen1@llnl.gov>
Date: Wed, 29 Sep 2021 10:06:53 -0700
Subject: [PATCH 12/37] Support for arbitrary data field names (#1967)

* Added support in the generic data reader and synthetic data reader
clases for arbitrary data fields.  Added catch2 testing for the
synthetic data reader to check that arbitrary fields are found and
that there is checking for valid field names.

* Added a test for the HDF5 data reader public API using HRRL data.

* Removed unused thread_buffer data structure from the generic data
reader.

* Adding a method to construct the linearized size lookup tables from a whitebox tester.

Added debugging messages.

* Changed the get_data function in the HDF5 data reader to avoid putting
the name of the conduit node into the search path for a given data
field within the node.

* Added data checking to the HDF5 HRRL public API tests.

* Add Catch2 file system tests

* Apply suggestions from code review

Co-authored-by: Tom Benson <30674819+benson31@users.noreply.github.com>

* Fixed the HDF5 data reader public API test to use the linearized size
function to support fetching multi-element data fields.

Removed some of the debugging code.

* Cleaning up and removing debugging code

* Moved HDF5 HRRL test samples into consolidated files.

* Added tests for throwing errors on invalid fields

* Removed dead code

* Added release notes

* Update include/lbann/data_readers/data_reader_synthetic.hpp

Co-authored-by: Tom Benson <30674819+benson31@users.noreply.github.com>

* Update src/data_readers/unit_test/data_reader_HDF5_hrrl_public_api.cpp

Co-authored-by: Tom Benson <30674819+benson31@users.noreply.github.com>

* Fixed the I/O RNG in the synthetic public API test and the check in internal synthetic test.  Fixed compiler error from code review

Co-authored-by: Tom Benson <30674819+benson31@users.noreply.github.com>
---
 ReleaseNotes.txt                              |   2 +
 bamboo/unit_tests/test_catch2_unit_tests.py   |  16 +-
 .../buffered_data_coordinator.hpp             |   2 +-
 include/lbann/data_readers/data_reader.hpp    |  26 ++-
 .../lbann/data_readers/data_reader_HDF5.hpp   |  17 +-
 .../data_readers/data_reader_synthetic.hpp    |  14 ++
 src/data_readers/data_reader.cpp              |  20 +-
 src/data_readers/data_reader_HDF5.cpp         |  22 +-
 src/data_readers/data_reader_synthetic.cpp    |  22 ++
 src/data_readers/unit_test/CMakeLists.txt     |   1 +
 .../data_reader_HDF5_hrrl_data_test.cpp       | 112 +---------
 .../data_reader_HDF5_hrrl_public_api.cpp      | 195 +++++++++++++++++
 .../unit_test/data_reader_synthetic_test.cpp  |  62 ++++++
 .../data_reader_synthetic_test_public_api.cpp | 202 +++++++++++++++++-
 .../hdf5_hrrl_test_data_and_schema.yaml       |  84 ++++++++
 src/data_store/data_store_conduit.cpp         |   7 +-
 16 files changed, 660 insertions(+), 144 deletions(-)
 create mode 100644 src/data_readers/unit_test/data_reader_HDF5_hrrl_public_api.cpp
 create mode 100644 src/data_readers/unit_test/test_data/hdf5_hrrl_test_data_and_schema.yaml

diff --git a/ReleaseNotes.txt b/ReleaseNotes.txt
index f0c3619bc0b..3fc55127ddb 100644
--- a/ReleaseNotes.txt
+++ b/ReleaseNotes.txt
@@ -50,6 +50,8 @@ I/O & data readers:
    take dynamic data fields rather than fixed fields.  Input buffers
    are no long allocated for fields that are not used in active
    models.
+ - Added support in the generic data reader and synthetic data reader
+   clases for arbitrary data fields.
 
 Build system:
 
diff --git a/bamboo/unit_tests/test_catch2_unit_tests.py b/bamboo/unit_tests/test_catch2_unit_tests.py
index 99f58af5b22..25e1b8b4d98 100644
--- a/bamboo/unit_tests/test_catch2_unit_tests.py
+++ b/bamboo/unit_tests/test_catch2_unit_tests.py
@@ -53,4 +53,18 @@ def test_run_parallel_catch_tests(cluster, dirname):
     mpi_catch_args = [mpi_catch_exe, '-r', 'junit', '-o', mpi_output_file]
     output = sp.run(mpi_launch + mpi_catch_args)
     tools.assert_success(output.returncode, mpi_output_file)
-    
+
+def test_run_parallel_filesystem_catch_tests(cluster, dirname):
+    output_dir = os.path.join(dirname, 'bamboo', 'unit_tests')
+    build_dir = hack_find_spack_build_dir(dirname)
+    mpi_catch_exe = os.path.join(build_dir, 'unit_test', 'mpi-catch-tests')
+    if not os.path.exists(mpi_catch_exe):
+        print('Skip - executable not found')
+        pytest.skip('executable not found')
+    # Run the parallel tests
+    mpi_launch = get_system_mpi_launch(cluster)
+    mpi_output_file_name = 'mpi_filesystem_catch_tests_output-%s-rank=%%r-size=%%s.xml' % (cluster)
+    mpi_output_file = os.path.join(output_dir, mpi_output_file_name)
+    mpi_catch_args = [mpi_catch_exe, '"[filesystem]"', '-r', 'junit', '-o', mpi_output_file]
+    output = sp.run(mpi_launch + mpi_catch_args)
+    tools.assert_success(output.returncode, mpi_output_file)
diff --git a/include/lbann/data_coordinator/buffered_data_coordinator.hpp b/include/lbann/data_coordinator/buffered_data_coordinator.hpp
index 8e867bd17fd..7e60bee89d5 100644
--- a/include/lbann/data_coordinator/buffered_data_coordinator.hpp
+++ b/include/lbann/data_coordinator/buffered_data_coordinator.hpp
@@ -170,7 +170,7 @@ class buffered_data_coordinator : public data_coordinator {
   /** @brief After a data field has been registered with the data
    *  coordinator setup its buffers. Note this can be called after
    *  each call to register_active_data_field. */
-  void setup_data_fields(int max_mini_batch_size);
+  void setup_data_fields(int max_mini_batch_size) override;
 
   /**
    * Map from execution context to the index of the active data buffer
diff --git a/include/lbann/data_readers/data_reader.hpp b/include/lbann/data_readers/data_reader.hpp
index 46d94643454..332f5753257 100644
--- a/include/lbann/data_readers/data_reader.hpp
+++ b/include/lbann/data_readers/data_reader.hpp
@@ -330,6 +330,12 @@ class generic_data_reader {
     return has_data_field(INPUT_DATA_TYPE_RESPONSES);
   }
 
+  /// Whether or not a data reader has a data field
+  void set_has_data_field(data_field_type const data_field, const bool b)
+  {
+    m_supported_input_types[data_field] = b;
+  }
+
   /// Whether or not a data reader has labels
   virtual void set_has_labels(const bool b)
   {
@@ -765,6 +771,24 @@ class generic_data_reader {
                    El::Int mb_size,
                    El::Matrix<El::Int>& indices_fetched);
 
+  /** @brief Called by fetch_data, fetch_label, fetch_response
+   *
+   * Fetch data from a single data field into a matrix.
+   * @param data_field The name of the data field.  May be one of the commonly
+   *        used (samples, labels, responses) or any data_field that exists
+   *        within an HDF5 experiment schema, Python DR schema, or synthetic
+   *        data reader
+   * @param X The matrix to load data into.
+   * @param data_id The index of the datum to fetch.
+   * @param mb_idx The index within the mini-batch.
+   *
+   */
+  virtual bool fetch_data_field(data_field_type data_field, CPUMat& Y, int data_id, int mb_idx)
+  {
+    NOT_IMPLEMENTED("fetch_data_field");
+    return false;
+  }
+
   /**
    * Fetch a single sample into a matrix.
    * @param X The matrix to load data into.
@@ -902,8 +926,6 @@ class generic_data_reader {
   bool m_gan_labelling; //boolean flag of whether its GAN binary label, default is false
   int m_gan_label_value; //zero(0) or 1 label value for discriminator, default is 0
 
-  std::vector<std::vector<char>> m_thread_buffer;
-
   observer_ptr<thread_pool> m_io_thread_pool;
 
   /// special handling for 1B jag; each reader
diff --git a/include/lbann/data_readers/data_reader_HDF5.hpp b/include/lbann/data_readers/data_reader_HDF5.hpp
index d1ec450e832..a3fff29d506 100644
--- a/include/lbann/data_readers/data_reader_HDF5.hpp
+++ b/include/lbann/data_readers/data_reader_HDF5.hpp
@@ -65,27 +65,21 @@ class hdf5_data_reader : public data_reader_sample_list<sample_list_hdf5<std::st
 
   void load() override;
 
-  /** @brief Called by fetch_data, fetch_label, fetch_response
-   *
-   * Note that 'which' is not confined to the three commonly used
-   * in lbann (datum, label, response); in general, it can be
-   * any pack field in the experiment schema: pack: <string>
-   */
-  bool fetch(data_field_type data_field, CPUMat& Y, int data_id, int mb_idx);
+  bool fetch_data_field(data_field_type data_field, CPUMat& Y, int data_id, int mb_idx) override;
 
   bool fetch_datum(CPUMat& X, int data_id, int mb_idx) override
   {
-    return fetch("datum", X, data_id, mb_idx);
+    return fetch_data_field("datum", X, data_id, mb_idx);
   }
 
   bool fetch_response(CPUMat& Y, int data_id, int mb_idx) override
   {
-    return fetch("response", Y, data_id, mb_idx);
+    return fetch_data_field("response", Y, data_id, mb_idx);
   }
 
   bool fetch_label(CPUMat& Y, int data_id, int mb_idx) override
   {
-    return fetch("label", Y, data_id, mb_idx);
+    return fetch_data_field("label", Y, data_id, mb_idx);
   }
 
   /** @brief Sets the name of the yaml experiment file */
@@ -256,7 +250,7 @@ class hdf5_data_reader : public data_reader_sample_list<sample_list_hdf5<std::st
   const std::vector<int> get_data_dims(std::string name = "") const;
 
   /** Returns the size of the requested field (datum, label, response, etc) */
-  int get_linearized_size(std::string const& name) const override;
+  int get_linearized_size(data_field_type const& data_field) const override;
 
   /** P_0 reads and bcasts the schema */
   void load_sample_schema(conduit::Schema& s);
@@ -338,6 +332,7 @@ class hdf5_data_reader : public data_reader_sample_list<sample_list_hdf5<std::st
 
   /** Constructs m_data_dims_lookup_table and m_linearized_size_lookup_table */
   void construct_linearized_size_lookup_tables();
+  void construct_linearized_size_lookup_tables(conduit::Node& node);
 
   /** sanity check; call after adjust_metadata */
   void test_that_all_nodes_contain_metadata(conduit::Node& node);
diff --git a/include/lbann/data_readers/data_reader_synthetic.hpp b/include/lbann/data_readers/data_reader_synthetic.hpp
index caa54fa7cf8..20d7ac40ff0 100644
--- a/include/lbann/data_readers/data_reader_synthetic.hpp
+++ b/include/lbann/data_readers/data_reader_synthetic.hpp
@@ -29,6 +29,7 @@
 #define LBANN_DATA_READER_SYNTHETIC_HPP
 
 #include "data_reader.hpp"
+#include "lbann/utils/dim_helpers.hpp"
 
 // Forward declaration
 class DataReaderSyntheticWhiteboxTester;
@@ -47,6 +48,8 @@ class data_reader_synthetic : public generic_data_reader {
                         int num_labels, bool shuffle = true);
   data_reader_synthetic(int num_samples, std::vector<int> dims,
                         std::vector<int> response_dims, bool shuffle = true);
+  data_reader_synthetic(int num_samples, std::map<data_field_type, std::vector<int>> data_fields,
+                        bool shuffle = true);
   data_reader_synthetic(const data_reader_synthetic&) = default;
   data_reader_synthetic& operator=(const data_reader_synthetic&) = default;
   ~data_reader_synthetic() override {}
@@ -59,6 +62,14 @@ class data_reader_synthetic : public generic_data_reader {
 
   void load() override;
 
+  int get_linearized_size(data_field_type const& data_field) const override {
+    auto iter = m_synthetic_data_fields.find(data_field);
+    if (iter == end(m_synthetic_data_fields)) {
+      LBANN_ERROR("Unknown data field ", data_field);
+    }
+    return get_linear_size(iter->second);
+  }
+
   int get_linearized_data_size() const override {
     return std::accumulate(m_dimensions.begin(), m_dimensions.end(), 1,
                            std::multiplies<int>());
@@ -82,6 +93,7 @@ class data_reader_synthetic : public generic_data_reader {
   }
 
  protected:
+  bool fetch_data_field(data_field_type data_field, CPUMat& Y, int data_id, int mb_idx) override;
   bool fetch_datum(CPUMat& X, int data_id, int mb_idx) override;
   bool fetch_label(CPUMat& Y, int data_id, int mb_idx) override;
   bool fetch_response(CPUMat& Y, int data_id, int mb_idx) override;
@@ -98,6 +110,8 @@ class data_reader_synthetic : public generic_data_reader {
   std::vector<int> m_dimensions;
   /** Shape of the responses. */
   std::vector<int> m_response_dimensions;
+
+  std::map<data_field_type, std::vector<int>> m_synthetic_data_fields;
 };
 
 }  // namespace lbann
diff --git a/src/data_readers/data_reader.cpp b/src/data_readers/data_reader.cpp
index 578b1c740fc..b3f7070324d 100644
--- a/src/data_readers/data_reader.cpp
+++ b/src/data_readers/data_reader.cpp
@@ -80,10 +80,6 @@ void generic_data_reader::setup(int num_io_threads, observer_ptr<thread_pool> io
 
   shuffle_indices();
 
-  m_thread_buffer.resize(num_io_threads, std::vector<char>());
-  for(int tid = 0; tid < num_io_threads; ++tid) {
-    m_thread_buffer[tid].resize(get_linearized_data_size());
-  }
   m_io_thread_pool = io_thread_pool;
 }
 
@@ -270,6 +266,22 @@ bool lbann::generic_data_reader::fetch_data_block(
           LBANN_ERROR("invalid datum (index ", std::to_string(index), ")");
         }
       }
+      else if (has_data_field(data_field)) {
+        if (buf == nullptr || buf->Height() == 0 || buf->Width() == 0) {
+          LBANN_ERROR(
+            "fetch_data_block function called with invalid buffer: h=",
+            buf->Height(),
+            " x ",
+            buf->Width());
+        }
+        valid = fetch_data_field(data_field, *buf, index, s);
+        if (!valid) {
+          LBANN_ERROR("invalid datum (index ", std::to_string(index), ") for field ", data_field);
+        }
+      }
+      else {
+        LBANN_ERROR("Unsupported data_field ", data_field);
+      }
     }
   }
 
diff --git a/src/data_readers/data_reader_HDF5.cpp b/src/data_readers/data_reader_HDF5.cpp
index d541d016b94..41742ebb953 100644
--- a/src/data_readers/data_reader_HDF5.cpp
+++ b/src/data_readers/data_reader_HDF5.cpp
@@ -824,14 +824,17 @@ const std::vector<int> hdf5_data_reader::get_data_dims(std::string name) const
   return iter->second;
 }
 
-int hdf5_data_reader::get_linearized_size(std::string const& name) const
+int hdf5_data_reader::get_linearized_size(data_field_type const& data_field) const
 {
+  if (m_linearized_size_lookup_table.size() == 0) {
+    LBANN_ERROR("get_linearized_size was called with an empty lookup table");
+  }
   std::unordered_map<std::string, int>::const_iterator iter =
-    m_linearized_size_lookup_table.find(name);
+    m_linearized_size_lookup_table.find(data_field);
   if (iter == m_linearized_size_lookup_table.end()) {
-    LBANN_ERROR("get_linearized_data_size was asked for info about an unknown "
-                "field name: ",
-                name,
+    LBANN_ERROR("get_linearized_size was asked for info about an unknown "
+                "data field: ",
+                data_field,
                 "; table size: ",
                 m_linearized_size_lookup_table.size(),
                 " for role: ",
@@ -855,6 +858,11 @@ void hdf5_data_reader::construct_linearized_size_lookup_tables()
   // could be included in the schemas
   load_sample(node, index);
 
+  return construct_linearized_size_lookup_tables(node);
+}
+
+void hdf5_data_reader::construct_linearized_size_lookup_tables(conduit::Node& node)
+{
   std::unordered_map<std::string, conduit::Node*> leaves;
   get_leaves(&node, leaves);
 
@@ -897,7 +905,7 @@ void hdf5_data_reader::construct_linearized_size_lookup_tables()
   }
 }
 
-bool hdf5_data_reader::fetch(data_field_type data_field,
+bool hdf5_data_reader::fetch_data_field(data_field_type data_field,
                              CPUMat& Y,
                              int data_id,
                              int mb_idx)
@@ -1035,7 +1043,7 @@ const void* hdf5_data_reader::get_data(const size_t sample_id_in,
   // get the pathname to the data, and verify it exists in the conduit::Node
   const conduit::Node& node = m_data_store->get_conduit_node(sample_id_in);
   std::ostringstream ss;
-  ss << node.name() << node.child(0).name() + "/" << data_field;
+  ss << node.child(0).name() + "/" << data_field;
   if (!node.has_path(ss.str())) {
     LBANN_ERROR("no path: ", ss.str());
   }
diff --git a/src/data_readers/data_reader_synthetic.cpp b/src/data_readers/data_reader_synthetic.cpp
index 03dcb5cf1d4..38a9671f013 100644
--- a/src/data_readers/data_reader_synthetic.cpp
+++ b/src/data_readers/data_reader_synthetic.cpp
@@ -77,6 +77,28 @@ data_reader_synthetic::data_reader_synthetic(int num_samples,
   set_has_responses(true);
 }
 
+data_reader_synthetic::data_reader_synthetic(int num_samples,
+                                             std::map<data_field_type, std::vector<int>> data_fields,
+                                             bool shuffle)
+  : generic_data_reader(shuffle),
+    m_num_samples(num_samples),
+    m_synthetic_data_fields(std::move(data_fields))
+{
+  for (auto const& [data_field, dims] : m_synthetic_data_fields) {
+    set_has_data_field(data_field, true);
+  }
+}
+
+bool data_reader_synthetic::fetch_data_field(data_field_type data_field, CPUMat& X, int data_id, int mb_idx) {
+  if (m_synthetic_data_fields.find(data_field) == m_synthetic_data_fields.end()) {
+    LBANN_WARNING("Unknown data field ", data_field);
+    return false;
+  }
+  auto X_v = El::View(X, El::ALL, El::IR(mb_idx, mb_idx + 1));
+  fill_matrix(X_v);
+  return true;
+}
+
 bool data_reader_synthetic::fetch_datum(CPUMat& X, int data_id, int mb_idx) {
   auto X_v = El::View(X, El::ALL, El::IR(mb_idx, mb_idx + 1));
   fill_matrix(X_v);
diff --git a/src/data_readers/unit_test/CMakeLists.txt b/src/data_readers/unit_test/CMakeLists.txt
index 4221e14e068..1c5b78cae7d 100644
--- a/src/data_readers/unit_test/CMakeLists.txt
+++ b/src/data_readers/unit_test/CMakeLists.txt
@@ -7,6 +7,7 @@ set_full_path(THIS_DIR_SEQ_CATCH2_TEST_FILES
 set_full_path(THIS_DIR_MPI_CATCH2_TEST_FILES
   data_reader_smiles_fetch_datum_test.cpp
   data_reader_smiles_sample_list_test.cpp
+  data_reader_HDF5_hrrl_public_api.cpp
   data_reader_HDF5_test.cpp
   data_reader_HDF5_sample_list_test.cpp
   data_reader_synthetic_test_public_api.cpp
diff --git a/src/data_readers/unit_test/data_reader_HDF5_hrrl_data_test.cpp b/src/data_readers/unit_test/data_reader_HDF5_hrrl_data_test.cpp
index 41e7609fefb..19a904cb6c1 100644
--- a/src/data_readers/unit_test/data_reader_HDF5_hrrl_data_test.cpp
+++ b/src/data_readers/unit_test/data_reader_HDF5_hrrl_data_test.cpp
@@ -37,34 +37,9 @@
 #include <string.h>
 
 #include "lbann/data_readers/data_reader_HDF5.hpp"
-
-// It feels like we should be able to pack this node, but with the additional
-// level of hierarchy in the sample name, it fails
-const std::string hdf5_hrrl_data_sample =R"FOO(RUN_ID:
-  000000334:
-    Epmax: 15.2486634101312
-    Etot: 0.0426354341969429
-    Image: [456.288777930614, 231.340700217946, 113.528447010204, 115.115911382861, 116.716861149023, 118.331222098325, 120.52874207647, 122.175220756304, 123.834871115725, 125.507597035081, 126.011234474661, 123.587537036166]
-    N: 64037572840.4818
-    T: 5.34505173275895
-    alpha: 32.6826031770453
-)FOO";
-
-// Use this version of the sample for the packing test
-const std::string hdf5_hrrl_data_sample_id =R"FOO(000000334:
-    Epmax: 15.2486634101312
-    Etot: 0.0426354341969429
-    Image: [456.288777930614, 231.340700217946, 113.528447010204, 115.115911382861, 116.716861149023, 118.331222098325, 120.52874207647, 122.175220756304, 123.834871115725, 125.507597035081, 126.011234474661, 123.587537036166]
-    N: 64037572840.4818
-    T: 5.34505173275895
-    alpha: 32.6826031770453
-)FOO";
-
-// Here is how the HRRL data expects its sample to be packed for this experiment schema
-const std::string packed_hdf5_hrrl_data_sample_id =R"FOO(000000334:
-    datum: [456.288777930614, 231.340700217946, 113.528447010204, 115.115911382861, 116.716861149023, 118.331222098325, 120.52874207647, 122.175220756304, 123.834871115725, 125.507597035081, 126.011234474661, 123.587537036166]
-    response: [15.2486634101312, 0.0426354341969429, 64037572840.4818, 5.34505173275895, 32.6826031770453]
-)FOO";
+#include "./test_data/hdf5_hrrl_data_schema.yaml"
+#include "./test_data/hdf5_hrrl_experiment_schema.yaml"
+#include "./test_data/hdf5_hrrl_test_data_and_schema.yaml"
 
 // Use a different schema to create a different packing
 const std::string packed_hdf5_hrrl_data_sample_id_foobar =R"FOO(000000334:
@@ -82,85 +57,6 @@ const std::string packed_hdf5_hrrl_data_sample_id_foobar_permute =R"FOO(00000033
     baz: [32.6826031770453]
 )FOO";
 
-const std::string hdf5_hrrl_data_schema_test = R"AurthurDent(
-# Re, the "ordering" fields: ordering is relative and need not be unique;
-# it specifies, e.g, the order in which a set of scalars
-# would be appended to a vector.
-#
-# metadata values in the below schema can be over-ridden by values in
-# the experiment_schema.yaml
-#
-# For reference: the metadata nodes may contain additional info,
-# e.g, scale and bias for normalization.
-#
-# The intent is that the the schema and metadata values below should
-# be reasonably static, while the experiment_schema species the
-# subset of values to use in an experiment
-#
-#
-Image:
-  metadata:
-    dims: [4,3]
-    channels: 1
-    ordering: 0
-    scale: [1.5259021896696422e-05]
-    bias: [-1.5259021896696422e-05]
-Epmax:
-  metadata:
-    ordering: 10
-    scale: 0.1
-    bias: -1.0
-Etot:
-  metadata:
-    ordering: 20
-    scale: 0.3916485873519399
-    bias: -0.00039973613068075743
-T:
-  metadata:
-    ordering: 50
-    scale: 0.125
-    bias: -0.25
-alpha:
-  metadata:
-    ordering: 60
-    scale: 0.1
-    bias: -2.5
-
-N:
-  metadata:
-    ordering: 40
-    scale: 3.1662826662374707e-13
-    bias: -0.001001267234978943
-Xshift:
-  metadata:
-    ordering: 70
-Yshift:
-  metadata:
-    ordering: 80
-)AurthurDent";
-
-const std::string hdf5_hrrl_experiment_schema_test = R"AurthurDent(
-Image:
-  metadata:
-    pack: "datum"
-    coerce: "float"
-Epmax:
-  metadata:
-    pack: "response"
-Etot:
-  metadata:
-    pack: "response"
-N:
-  metadata:
-    pack: "response"
-T:
-  metadata:
-    pack: "response"
-alpha:
-  metadata:
-    pack: "response"
-)AurthurDent";
-
 const std::string hdf5_hrrl_experiment_schema_test_foobar = R"AurthurDent(
 Image:
   metadata:
@@ -360,7 +256,7 @@ TEST_CASE("hdf5 data reader pack test",
   {
     // Read in the experiment schema and setup the data reader
     conduit::Node& experiment_schema = white_box_tester.get_experiment_schema(*hdf5_dr);
-    experiment_schema.parse(hdf5_hrrl_experiment_schema_test, "yaml");
+    experiment_schema.parse(hdf5_hrrl_experiment_schema, "yaml");
     // experiment_schema.print();
     white_box_tester.parse_schemas(*hdf5_dr);
 
diff --git a/src/data_readers/unit_test/data_reader_HDF5_hrrl_public_api.cpp b/src/data_readers/unit_test/data_reader_HDF5_hrrl_public_api.cpp
new file mode 100644
index 00000000000..abe7095ae5a
--- /dev/null
+++ b/src/data_readers/unit_test/data_reader_HDF5_hrrl_public_api.cpp
@@ -0,0 +1,195 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include <catch2/catch.hpp>
+
+#include "MPITestHelpers.hpp"
+#include "TestHelpers.hpp"
+#include "lbann/proto/proto_common.hpp"
+#include "lbann/utils/threads/thread_pool.hpp"
+#include "lbann/utils/threads/thread_utils.hpp"
+#include <google/protobuf/text_format.h>
+#include <lbann.pb.h>
+
+#include <conduit/conduit.hpp>
+#include <cstdlib>
+#include <errno.h>
+#include <string.h>
+
+#include "lbann/data_readers/data_reader_HDF5.hpp"
+#include "./test_data/hdf5_hrrl_experiment_schema.yaml"
+#include "./test_data/hdf5_hrrl_test_data_and_schema.yaml"
+
+class DataReaderHDF5WhiteboxTester
+{
+public:
+  void normalize(lbann::hdf5_data_reader& x,
+                 conduit::Node& node,
+                 const std::string& path,
+                 const conduit::Node& metadata)
+  { x.normalize(node, path, metadata); }
+  void repack_image(lbann::hdf5_data_reader& x,
+                    conduit::Node& node,
+                    const std::string& path,
+                    const conduit::Node& metadata)
+  { x.repack_image(node, path, metadata); }
+
+  void pack(lbann::hdf5_data_reader& x,
+            conduit::Node& node,
+            size_t index)
+  { x.pack(node, index); }
+
+  void parse_schemas(lbann::hdf5_data_reader& x) {
+    return x.parse_schemas();
+  }
+
+  conduit::Node& get_data_schema(lbann::hdf5_data_reader& x) {
+    return x.m_data_schema;
+  }
+
+  conduit::Node& get_experiment_schema(lbann::hdf5_data_reader& x) {
+    return x.m_experiment_schema;
+  }
+
+  void set_data_schema(lbann::hdf5_data_reader& x,
+                       const conduit::Node& s) {
+    x.set_data_schema(s);
+  }
+
+  void set_experiment_schema(lbann::hdf5_data_reader& x,
+                             const conduit::Node& s) {
+    x.set_experiment_schema(s);
+  }
+
+  bool fetch_data_field(lbann::hdf5_data_reader& dr,
+                        lbann::data_field_type data_field,
+                        lbann::CPUMat& X,
+                        int data_id,
+                        int mb_idx)
+  {
+    return dr.fetch_data_field(data_field, X, data_id, mb_idx);
+  }
+
+  int get_linearized_size(lbann::hdf5_data_reader& dr,
+                          lbann::data_field_type const& data_field)
+  {
+    return dr.get_linearized_size(data_field);
+  }
+  void construct_linearized_size_lookup_tables(lbann::hdf5_data_reader& dr,
+                                              conduit::Node& node)
+  {
+    return dr.construct_linearized_size_lookup_tables(node);
+  }
+
+};
+
+TEST_CASE("hdf5 data reader data field fetch tests",
+          "[data_reader][hdf5][hrrl][data_field]")
+{
+  // initialize stuff (boilerplate)
+  auto& comm = unit_test::utilities::current_world_comm();
+  lbann::init_random(0, 2);
+  lbann::init_data_seq_random(42);
+
+  conduit::Node ref_node;
+  ref_node.parse(hdf5_hrrl_data_sample_id, "yaml");
+
+  lbann::hdf5_data_reader* hdf5_dr = new lbann::hdf5_data_reader();
+  DataReaderHDF5WhiteboxTester white_box_tester;
+
+  // Setup the data schema for this HRRL data set
+  conduit::Node& data_schema = white_box_tester.get_data_schema(*hdf5_dr);
+  data_schema.parse(hdf5_hrrl_data_schema_test, "yaml");
+  conduit::Node& experiment_schema = white_box_tester.get_experiment_schema(*hdf5_dr);
+  experiment_schema.parse(hdf5_hrrl_experiment_schema, "yaml");
+  white_box_tester.parse_schemas(*hdf5_dr);
+  // Manually tell the data reader to extract all of the data fields
+  white_box_tester.construct_linearized_size_lookup_tables(*hdf5_dr, ref_node);
+
+  hdf5_dr->set_rank(0);
+  hdf5_dr->set_comm(&comm);
+
+  El::Int num_samples = 1;
+
+  auto data_store = new lbann::data_store_conduit(hdf5_dr);
+  hdf5_dr->set_data_store(data_store);
+  // Take the sample and place it into the data store
+  int index = 0;
+  auto& ds = hdf5_dr->get_data_store();
+  conduit::Node& ds_node = ds.get_empty_node(index);
+  ds_node.parse(hdf5_hrrl_data_sample_id, "yaml");
+  ds.set_preloaded_conduit_node(index, ds_node);
+
+  // Initalize a per-trainer I/O thread pool
+  auto io_thread_pool = lbann::make_unique<lbann::thread_pool>();
+  io_thread_pool->launch_pinned_threads(1, 1);
+  hdf5_dr->setup(io_thread_pool->get_num_threads(), io_thread_pool.get());
+  hdf5_dr->set_num_parallel_readers(1);
+
+  SECTION("fetch data field")
+  {
+    lbann::CPUMat X;
+    std::vector<std::string> fields = {"Epmax", "Etot", "Image", "N", "T", "alpha"};
+    for (auto& data_field : fields) {
+      X.Resize(white_box_tester.get_linearized_size(*hdf5_dr, data_field), num_samples);
+
+      auto io_rng = lbann::set_io_generators_local_index(0);
+      for (auto j = 0; j < num_samples; j++) {
+        white_box_tester.fetch_data_field(*hdf5_dr, data_field, X, 0, j);
+      }
+
+      const std::string test_pathname("000000334/" + data_field);
+      for (El::Int j = 0; j < num_samples; j++) {
+        // Check to make sure that each element in the transformed field are properly normalized
+        size_t num_elements = ref_node[test_pathname].dtype().number_of_elements();
+        if(num_elements > 1) {
+          for(size_t i = 0; i < num_elements; i++) {
+            double check = ref_node[test_pathname].as_double_array()[i];
+            CHECK(X(i,0) == Approx(check));
+          }
+        }
+        else {
+          double check = ref_node[test_pathname].as_double();
+          CHECK(X(0,0) == Approx(check));
+        }
+      }
+    }
+  }
+
+  SECTION("fetch invalid data field")
+  {
+    lbann::CPUMat X;
+    std::vector<std::string> fields = {"foo"};
+    for (auto& data_field : fields) {
+      CHECK_THROWS(X.Resize(white_box_tester.get_linearized_size(*hdf5_dr, data_field), num_samples));
+
+      auto io_rng = lbann::set_io_generators_local_index(0);
+      for (auto j = 0; j < num_samples; j++) {
+        CHECK_THROWS(white_box_tester.fetch_data_field(*hdf5_dr, data_field, X, 0, j));
+      }
+    }
+  }
+}
diff --git a/src/data_readers/unit_test/data_reader_synthetic_test.cpp b/src/data_readers/unit_test/data_reader_synthetic_test.cpp
index 7c64b224672..29be513c4ac 100644
--- a/src/data_readers/unit_test/data_reader_synthetic_test.cpp
+++ b/src/data_readers/unit_test/data_reader_synthetic_test.cpp
@@ -66,6 +66,14 @@ class DataReaderSyntheticWhiteboxTester
   {
     return dr.fetch_response(Y, data_id, mb_idx);
   }
+  bool fetch_data_field(lbann::data_reader_synthetic& dr,
+                        lbann::data_field_type data_field,
+                        lbann::CPUMat& X,
+                        int data_id,
+                        int mb_idx)
+  {
+    return dr.fetch_data_field(data_field, X, data_id, mb_idx);
+  }
 };
 
 TEST_CASE("Synthetic data reader classification tests",
@@ -192,3 +200,57 @@ TEST_CASE("Synthetic data reader regression tests",
     }
   }
 }
+
+TEST_CASE("Synthetic data reader data field",
+          "[data_reader][synthetic][data_field]")
+{
+  // initialize stuff (boilerplate)
+  lbann::init_random(42, 1);
+  lbann::init_data_seq_random(42);
+
+  DataReaderSyntheticWhiteboxTester white_box_tester;
+
+  // Create a local copy of the RNG to check the synthetic data reader
+  lbann::fast_rng_gen ref_fast_generator;
+  ref_fast_generator.seed(lbann::hash_combine(42, 0));
+
+  auto s = GENERATE(range(1, 4));
+  El::Int num_samples = s;
+  std::vector<lbann::data_field_type> data_fields = {"foo", "bar"};
+  std::map<lbann::data_field_type, std::vector<int>> fields;
+  int f = 0;
+  for (auto const& data_field : data_fields) {
+    std::vector<int> dims = {s+f, s+f};
+    fields[data_field] = dims;
+    ++f;
+  }
+
+  SECTION("fetch data field")
+  {
+    auto dr = std::make_unique<lbann::data_reader_synthetic>(num_samples,
+                                                             fields,
+                                                             false);
+    lbann::CPUMat X;
+    for (auto const& [data_field, dims] : fields) {
+      X.Resize(dims[0] * dims[1], num_samples);
+
+      auto io_rng = lbann::set_io_generators_local_index(0);
+      for (El::Int j = 0; j < num_samples; j++) {
+        white_box_tester.fetch_data_field(*dr, data_field, X, 0, j);
+      }
+
+      for (El::Int j = 0; j < num_samples; j++) {
+        // Create a new normal distribution for each sample.  This ensures
+        // that the behavior matches the implementation in the synthetic data
+        // reader and handles the case of odd numbers of entries with a normal
+        // distriubtion implementation. (Specifically that entries for a
+        // normal distribution are generated in pairs.)
+        std::normal_distribution<lbann::DataType> dist(float(0), float(1));
+        for (El::Int i = 0; i < X.Height(); i++) {
+          CHECK(X(i, j) == dist(ref_fast_generator));
+        }
+      }
+    }
+    REQUIRE(white_box_tester.fetch_data_field(*dr, "foobar", X, 0, 0) == false);
+  }
+}
diff --git a/src/data_readers/unit_test/data_reader_synthetic_test_public_api.cpp b/src/data_readers/unit_test/data_reader_synthetic_test_public_api.cpp
index a080828211b..28d24dfc9db 100644
--- a/src/data_readers/unit_test/data_reader_synthetic_test_public_api.cpp
+++ b/src/data_readers/unit_test/data_reader_synthetic_test_public_api.cpp
@@ -151,11 +151,6 @@ TEST_CASE("Synthetic data reader public API tests",
 
     dr->fetch(local_input_buffers, indices_fetched);
 
-    // for (auto& [field, buf] : local_input_buffers) {
-    //   std::cout << "For field " << field << std::endl;
-    //   El::Print(*buf);
-    // }
-
     // Check all of the results that were fetched.  Ensure that the
     // data fields are accessed in the same order that they are in the map
     for (El::Int j = 0; j < num_samples; j++) {
@@ -176,8 +171,6 @@ TEST_CASE("Synthetic data reader public API tests",
         else if (data_field == INPUT_DATA_TYPE_LABELS) {
           auto& Y = *(local_input_buffers[INPUT_DATA_TYPE_LABELS]);
           auto index = lbann::fast_rand_int(ref_fast_generator, num_labels);
-          // std::cout << "Here is the reference value " << index <<
-          // std::endl;
           for (El::Int i = 0; i < Y.Height(); i++) {
             if (index == i) {
               CHECK(Y(i, j) == 1);
@@ -191,3 +184,198 @@ TEST_CASE("Synthetic data reader public API tests",
     }
   }
 }
+
+TEST_CASE("Synthetic data reader public API tests - arbitrary field",
+          "[mpi][data_reader][synthetic][public][data_field]")
+{
+  // initialize stuff (boilerplate)
+  auto& comm = unit_test::utilities::current_world_comm();
+  int seed = 42;
+  lbann::init_random(seed, 1);
+  lbann::init_data_seq_random(seed);
+
+  // Create a local copy of the RNG to check the synthetic data reader
+  lbann::fast_rng_gen ref_fast_generator;
+  // Mix in the rank in trainer
+  seed = lbann::hash_combine(seed, comm.get_rank_in_trainer());
+  // Mix in the I/O thread rank
+  ref_fast_generator.seed(lbann::hash_combine(seed, 0));
+
+  // Initalize a per-trainer I/O thread pool
+  auto io_thread_pool = lbann::make_unique<lbann::thread_pool>();
+  io_thread_pool->launch_pinned_threads(1, 1);
+
+  //  std::set<std::string> active_data_fields = {"samples"};
+  auto s = GENERATE(range(1, 2));
+  El::Int num_samples = s;
+  std::set<lbann::data_field_type> data_fields = {"foo", "bar"};
+  std::map<lbann::data_field_type, std::vector<int>> fields;
+  int f = 0;
+  std::map<lbann::data_field_type, std::unique_ptr<lbann::CPUMat>>
+    owning_local_input_buffers;
+  std::map<lbann::data_field_type, lbann::CPUMat*> local_input_buffers;
+  for (auto const& data_field : data_fields) {
+    std::vector<int> dims = {s+f, s+f};
+    fields[data_field] = dims;
+    ++f;
+    auto local_mat = std::make_unique<lbann::CPUMat>();
+    auto sample_size =
+      std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int>());
+    local_mat->Resize(sample_size, num_samples);
+    El::Zeros_seq(*local_mat, sample_size, num_samples);
+    local_input_buffers[data_field] = local_mat.get();
+    owning_local_input_buffers[data_field] = std::move(local_mat);
+  }
+  El::Matrix<El::Int> indices_fetched;
+  El::Zeros_seq(indices_fetched, num_samples, 1);
+
+  SECTION("fetch arbitrary data fields")
+  {
+    auto dr = std::make_unique<lbann::data_reader_synthetic>(num_samples,
+                                                             fields,
+                                                             false);
+    dr->setup(io_thread_pool->get_num_threads(), io_thread_pool.get());
+    dr->set_rank(0);
+    dr->set_comm(&comm);
+    dr->set_num_parallel_readers(1);
+    dr->load();
+    dr->set_mini_batch_size(num_samples);
+    dr->set_last_mini_batch_size(num_samples);
+    dr->set_initial_position();
+
+    dr->fetch(local_input_buffers, indices_fetched);
+
+    // Check all of the results that were fetched.  Ensure that the
+    // data fields are accessed in the same order that they are in the map
+    for (El::Int j = 0; j < num_samples; j++) {
+      for (auto const& data_field : data_fields) {
+        auto& X = *(local_input_buffers[data_field]);
+        // Create a new normal distribution for each sample.  This ensures
+        // that the behavior matches the implementation in the synthetic
+        // data reader and handles the case of odd numbers of entries with a
+        // normal distriubtion implementation. (Specifically that entries
+        // for a normal distribution are generated in pairs.)
+        std::normal_distribution<lbann::DataType> dist(float(0), float(1));
+        for (El::Int i = 0; i < X.Height(); i++) {
+          CHECK(X(i, j) == dist(ref_fast_generator));
+        }
+      }
+    }
+  }
+
+  SECTION("fetch arbitrary bad data field with extra fields")
+  {
+    std::map<lbann::data_field_type, std::vector<int>> test_fields;
+    lbann::data_field_type bad_field = "bar";
+    for (auto const& data_field : data_fields) {
+      if (data_field != bad_field) {
+        test_fields[data_field] = fields[data_field];
+      }
+    }
+    auto dr = std::make_unique<lbann::data_reader_synthetic>(num_samples,
+                                                             test_fields,
+                                                             false);
+    dr->setup(io_thread_pool->get_num_threads(), io_thread_pool.get());
+    dr->set_rank(0);
+    dr->set_comm(&comm);
+    dr->set_num_parallel_readers(1);
+    dr->load();
+    dr->set_mini_batch_size(num_samples);
+    dr->set_last_mini_batch_size(num_samples);
+    dr->set_initial_position();
+
+    CHECK_THROWS(dr->fetch(local_input_buffers, indices_fetched));
+
+    // All data buffers should be empty since it will have thrown an exception
+    for (El::Int j = 0; j < num_samples; j++) {
+      for (auto const& data_field : data_fields) {
+        auto& X = *(local_input_buffers[data_field]);
+        for (El::Int i = 0; i < X.Height(); i++) {
+          CHECK(X(i, j) == 0.0f);
+        }
+      }
+    }
+  }
+
+  SECTION("fetch arbitrary bad data fields - no extra buffers")
+  {
+    std::map<lbann::data_field_type, std::vector<int>> test_fields;
+    std::map<lbann::data_field_type, lbann::CPUMat*> test_local_input_buffers;
+    lbann::data_field_type bad_field = "bar";
+    for (auto const& data_field : data_fields) {
+      if (data_field != bad_field) {
+        test_fields[data_field] = fields[data_field];
+        test_local_input_buffers[data_field] = local_input_buffers[data_field];
+      }
+    }
+    auto dr = std::make_unique<lbann::data_reader_synthetic>(num_samples,
+                                                             test_fields,
+                                                             false);
+    dr->setup(io_thread_pool->get_num_threads(), io_thread_pool.get());
+    dr->set_rank(0);
+    dr->set_comm(&comm);
+    dr->set_num_parallel_readers(1);
+    dr->load();
+    dr->set_mini_batch_size(num_samples);
+    dr->set_last_mini_batch_size(num_samples);
+    dr->set_initial_position();
+
+    dr->fetch(test_local_input_buffers, indices_fetched);
+
+    // Check all of the results that were fetched.  Ensure that the
+    // data fields are accessed in the same order that they are in the map
+    for (El::Int j = 0; j < num_samples; j++) {
+      for (auto const& data_field : data_fields) {
+        auto& X = *(local_input_buffers[data_field]);
+        if (data_field == bad_field) {
+          for (El::Int i = 0; i < X.Height(); i++) {
+            CHECK(X(i, j) == 0.0f);
+          }
+        }
+        else {
+          // Create a new normal distribution for each sample.  This ensures
+          // that the behavior matches the implementation in the synthetic
+          // data reader and handles the case of odd numbers of entries with a
+          // normal distriubtion implementation. (Specifically that entries
+          // for a normal distribution are generated in pairs.)
+          std::normal_distribution<lbann::DataType> dist(float(0), float(1));
+          for (El::Int i = 0; i < X.Height(); i++) {
+            CHECK(X(i, j) == dist(ref_fast_generator));
+          }
+        }
+      }
+    }
+  }
+
+  SECTION("fetch arbitrary check has data field guard")
+  {
+    auto dr = std::make_unique<lbann::data_reader_synthetic>(num_samples,
+                                                             fields,
+                                                             false);
+    dr->setup(io_thread_pool->get_num_threads(), io_thread_pool.get());
+    dr->set_rank(0);
+    dr->set_comm(&comm);
+    dr->set_num_parallel_readers(1);
+    dr->load();
+    dr->set_mini_batch_size(num_samples);
+    dr->set_last_mini_batch_size(num_samples);
+    dr->set_initial_position();
+
+    for(auto const& data_field : data_fields) {
+      dr->set_has_data_field(data_field, false);
+    }
+
+    CHECK_THROWS(dr->fetch(local_input_buffers, indices_fetched));
+
+    // All data buffers should be empty since it will have thrown an exception
+    for (El::Int j = 0; j < num_samples; j++) {
+      for (auto const& data_field : data_fields) {
+        auto& X = *(local_input_buffers[data_field]);
+        for (El::Int i = 0; i < X.Height(); i++) {
+          CHECK(X(i, j) == 0.0f);
+        }
+      }
+    }
+  }
+
+}
diff --git a/src/data_readers/unit_test/test_data/hdf5_hrrl_test_data_and_schema.yaml b/src/data_readers/unit_test/test_data/hdf5_hrrl_test_data_and_schema.yaml
new file mode 100644
index 00000000000..7a0967b3f92
--- /dev/null
+++ b/src/data_readers/unit_test/test_data/hdf5_hrrl_test_data_and_schema.yaml
@@ -0,0 +1,84 @@
+// It feels like we should be able to pack this node, but with the additional
+// level of hierarchy in the sample name, it fails
+const std::string hdf5_hrrl_data_sample =R"FOO(RUN_ID:
+  000000334:
+    Epmax: 15.2486634101312
+    Etot: 0.0426354341969429
+    Image: [456.288777930614, 231.340700217946, 113.528447010204, 115.115911382861, 116.716861149023, 118.331222098325, 120.52874207647, 122.175220756304, 123.834871115725, 125.507597035081, 126.011234474661, 123.587537036166]
+    N: 64037572840.4818
+    T: 5.34505173275895
+    alpha: 32.6826031770453
+)FOO";
+
+// Use this version of the sample for the packing test
+const std::string hdf5_hrrl_data_sample_id =R"FOO(000000334:
+    Epmax: 15.2486634101312
+    Etot: 0.0426354341969429
+    Image: [456.288777930614, 231.340700217946, 113.528447010204, 115.115911382861, 116.716861149023, 118.331222098325, 120.52874207647, 122.175220756304, 123.834871115725, 125.507597035081, 126.011234474661, 123.587537036166]
+    N: 64037572840.4818
+    T: 5.34505173275895
+    alpha: 32.6826031770453
+)FOO";
+
+// Here is how the HRRL data expects its sample to be packed for this experiment schema
+const std::string packed_hdf5_hrrl_data_sample_id =R"FOO(000000334:
+    datum: [456.288777930614, 231.340700217946, 113.528447010204, 115.115911382861, 116.716861149023, 118.331222098325, 120.52874207647, 122.175220756304, 123.834871115725, 125.507597035081, 126.011234474661, 123.587537036166]
+    response: [15.2486634101312, 0.0426354341969429, 64037572840.4818, 5.34505173275895, 32.6826031770453]
+)FOO";
+
+const std::string hdf5_hrrl_data_schema_test = R"AurthurDent(
+# Re, the "ordering" fields: ordering is relative and need not be unique;
+# it specifies, e.g, the order in which a set of scalars
+# would be appended to a vector.
+#
+# metadata values in the below schema can be over-ridden by values in
+# the experiment_schema.yaml
+#
+# For reference: the metadata nodes may contain additional info,
+# e.g, scale and bias for normalization.
+#
+# The intent is that the the schema and metadata values below should
+# be reasonably static, while the experiment_schema species the
+# subset of values to use in an experiment
+#
+#
+Image:
+  metadata:
+    dims: [4,3]
+    channels: 1
+    ordering: 0
+    scale: [1.5259021896696422e-05]
+    bias: [-1.5259021896696422e-05]
+Epmax:
+  metadata:
+    ordering: 10
+    scale: 0.1
+    bias: -1.0
+Etot:
+  metadata:
+    ordering: 20
+    scale: 0.3916485873519399
+    bias: -0.00039973613068075743
+T:
+  metadata:
+    ordering: 50
+    scale: 0.125
+    bias: -0.25
+alpha:
+  metadata:
+    ordering: 60
+    scale: 0.1
+    bias: -2.5
+
+N:
+  metadata:
+    ordering: 40
+    scale: 3.1662826662374707e-13
+    bias: -0.001001267234978943
+Xshift:
+  metadata:
+    ordering: 70
+Yshift:
+  metadata:
+    ordering: 80
+)AurthurDent";
diff --git a/src/data_store/data_store_conduit.cpp b/src/data_store/data_store_conduit.cpp
index b8c4ed237de..c71358d180b 100644
--- a/src/data_store/data_store_conduit.cpp
+++ b/src/data_store/data_store_conduit.cpp
@@ -416,19 +416,20 @@ void data_store_conduit::set_conduit_node(int data_id, const conduit::Node &node
 }
 
 const conduit::Node & data_store_conduit::get_conduit_node(int data_id) const {
+  using iterator_t = std::unordered_map<int, conduit::Node>::const_iterator;
   if (is_local_cache()) {
-    std::unordered_map<int, conduit::Node>::const_iterator t3 = m_data.find(data_id);
+    iterator_t t3 = m_data.find(data_id);
     if (t3 == m_data.end()) {
       LBANN_ERROR("(local cache) failed to find data_id: ", data_id, " in m_data; m_data.size: ", m_data.size());
     }
     return t3->second;
   }
 
-  std::unordered_map<int, conduit::Node>::const_iterator t2 = m_minibatch_data.find(data_id);
+  iterator_t t2 = m_minibatch_data.find(data_id);
   // if not preloaded, and get_label() or get_response() is called,
   // we need to check m_data
   if (t2 == m_minibatch_data.end()) {
-    std::unordered_map<int, conduit::Node>::const_iterator t3 = m_data.find(data_id);
+    iterator_t t3 = m_data.find(data_id);
     if (t3 != m_data.end()) {
       return t3->second["data"];
     }

From ae29b8649ccb11e4047d5342175e2722b753963a Mon Sep 17 00:00:00 2001
From: Tom Benson <30674819+benson31@users.noreply.github.com>
Date: Thu, 30 Sep 2021 11:40:57 -0400
Subject: [PATCH 13/37] Fix data type issue with operators at the PFE level.
 (#1978)

* Fix data type issue with operators at the PFE level.
* Fix an issue when the data type hasn't been set at all in the layer.
---
 python/lbann/core/operator_layers.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/python/lbann/core/operator_layers.py b/python/lbann/core/operator_layers.py
index cfb56f2d0be..0368c4481ec 100644
--- a/python/lbann/core/operator_layers.py
+++ b/python/lbann/core/operator_layers.py
@@ -39,9 +39,18 @@ def __init__(self, *args, **kwargs):
         layer_kwargs['ops'] = [ operator_class(**op_kwargs) ]
         OperatorLayer.__init__(self, *args, **layer_kwargs)
 
+    def export_proto(self):
+        """Construct and return a protobuf message."""
+        if (self.datatype is None):
+            self.datatype = 0 # Use the default value.
+        for o in self.ops:
+            o.input_type = self.datatype
+            o.output_type = self.datatype
+        return OperatorLayer.export_proto(self)
+
     # Return operator layer class
     class_name = operator_class.__name__
-    class_dict = {'__init__': __init__}
+    class_dict = {'__init__': __init__, 'export_proto': export_proto}
     return type(class_name, (OperatorLayer,), class_dict)
 
 def is_operator_class(obj):

From 32b641a603c567999ace24a2ea56180f4e995285 Mon Sep 17 00:00:00 2001
From: Tom Benson <30674819+benson31@users.noreply.github.com>
Date: Mon, 4 Oct 2021 13:08:31 -0400
Subject: [PATCH 14/37] Make the learning rate in the optimizer not a dependent
 type. (#1979)

* Make the learning rate a double

* Move code from data_type_optimizer.cpp to data_type_optimizer_impl.hpp; clang-format

* fix a few issues related to data type optimizers

* Fix a potential segfault-that-should-be-an-exception in the model

* fix an issue with channelwise FC when using mixed data types

* Fix some issues related to half_float::half
---
 include/lbann/layers/data_type_layer.hpp      |   1 -
 .../lbann/optimizers/data_type_optimizer.hpp  |  28 ++-
 .../optimizers/data_type_optimizer_impl.hpp   | 166 +++++++++++++++++-
 include/lbann/optimizers/optimizer.hpp        |   3 +
 src/callbacks/learning_rate.cpp               |   9 +-
 src/callbacks/perturb_adam.cpp                |   2 +-
 src/callbacks/perturb_learning_rate.cpp       |   6 +-
 src/callbacks/variable_minibatch.cpp          |  14 +-
 .../learning/channelwise_fully_connected.cpp  |  75 ++++----
 src/models/model.cpp                          |  22 +--
 src/optimizers/adagrad.cpp                    |   2 +-
 src/optimizers/adam.cpp                       |   6 +-
 src/optimizers/data_type_optimizer.cpp        | 151 +---------------
 src/optimizers/hypergradient_adam.cpp         |   4 +-
 src/optimizers/rmsprop.cpp                    |   2 +-
 src/optimizers/sgd.cpp                        |   2 +-
 16 files changed, 247 insertions(+), 246 deletions(-)

diff --git a/include/lbann/layers/data_type_layer.hpp b/include/lbann/layers/data_type_layer.hpp
index 1db008edbc3..95f3f16c7c7 100644
--- a/include/lbann/layers/data_type_layer.hpp
+++ b/include/lbann/layers/data_type_layer.hpp
@@ -289,7 +289,6 @@ class data_type_layer : public Layer {
   void setup_inter_subgrid_comm_based_on_childs(const El::Grid& grid);
   void setup_inter_subgrid_comm_based_on_parents(const El::Grid& grid);
 
-
 private:
 
   /** @brief Attempt to take ownership of the previous error signal.
diff --git a/include/lbann/optimizers/data_type_optimizer.hpp b/include/lbann/optimizers/data_type_optimizer.hpp
index adeaedae1d3..0935a190145 100644
--- a/include/lbann/optimizers/data_type_optimizer.hpp
+++ b/include/lbann/optimizers/data_type_optimizer.hpp
@@ -30,10 +30,9 @@
 #include "lbann/optimizers/optimizer.hpp"
 
 // Forward declarations
-namespace cereal
-{
-  class access;
-}// namespace cereal
+namespace cereal {
+class access;
+} // namespace cereal
 
 namespace lbann {
 
@@ -43,9 +42,9 @@ class data_type_weights;
 
 template <typename TensorDataType>
 class data_type_optimizer
-  : public Cloneable<
-             HasAbstractFunction<data_type_optimizer<TensorDataType>>,
-             optimizer> {
+  : public Cloneable<HasAbstractFunction<data_type_optimizer<TensorDataType>>,
+                     optimizer>
+{
 
   using BaseType =
     Cloneable<HasAbstractFunction<data_type_optimizer<TensorDataType>>,
@@ -105,20 +104,19 @@ class data_type_optimizer
   ///@}
 
   /** @brief Access the scaling factor for optimization step sizes. */
-  TensorDataType get_learning_rate() const;
+  double get_learning_rate() const final;
   /** @brief Set the scaling factor for optimization step sizes. */
-  void set_learning_rate(TensorDataType learning_rate);
+  void set_learning_rate(double learning_rate) override;
 
   /** @name Checkpointing functionality */
   ///@{
   /** @brief Archive for checkpoint and restart */
   template <class Archive>
-  void serialize(Archive & ar);
+  void serialize(Archive& ar);
 
   ///@}
 
 protected:
-
   data_type_optimizer(const data_type_optimizer& other);
   data_type_optimizer& operator=(const data_type_optimizer& other);
 
@@ -133,10 +131,9 @@ class data_type_optimizer
   /** @brief Get the info needed to construct a new gradient matrix.
    *  @return Tuple of height, width, and DistData.
    */
-  std::tuple<El::Int,El::Int,El::DistData> get_matrix_info() const final;
+  std::tuple<El::Int, El::Int, El::DistData> get_matrix_info() const final;
 
 private:
-
   /** @brief Weights being optimized. */
   data_type_weights<TensorDataType>* m_weights = nullptr;
 
@@ -166,12 +163,11 @@ class data_type_optimizer
    *
    *  @todo Consider moving this to the derived classes.
    */
-  TensorDataType m_learning_rate;
+  double m_learning_rate;
 };
 
 #ifndef LBANN_DATA_TYPE_OPTIMIZER_INSTANTIATE
-#define PROTO(T)                                \
-  extern template class data_type_optimizer<T>
+#define PROTO(T) extern template class data_type_optimizer<T>
 
 #define LBANN_INSTANTIATE_CPU_HALF
 #define LBANN_INSTANTIATE_GPU_HALF
diff --git a/include/lbann/optimizers/data_type_optimizer_impl.hpp b/include/lbann/optimizers/data_type_optimizer_impl.hpp
index 9c310a33b4d..0ed208a4b22 100644
--- a/include/lbann/optimizers/data_type_optimizer_impl.hpp
+++ b/include/lbann/optimizers/data_type_optimizer_impl.hpp
@@ -27,18 +27,174 @@
 #ifndef LBANN_OPTIMIZERS_DATA_TYPE_OPTIMIZER_IMPL_HPP_INCLUDED
 #define LBANN_OPTIMIZERS_DATA_TYPE_OPTIMIZER_IMPL_HPP_INCLUDED
 
+#include "lbann/weights/data_type_weights.hpp"
 #include "lbann/utils/serialize.hpp"
+#include "lbann/utils/timer.hpp"
+
 #include "lbann/optimizers/data_type_optimizer.hpp"
 
 namespace lbann {
 
+template <typename TensorDataType>
+data_type_optimizer<TensorDataType>::data_type_optimizer(
+  TensorDataType learning_rate)
+  : m_learning_rate(learning_rate)
+{}
+
+template <typename TensorDataType>
+data_type_optimizer<TensorDataType>::data_type_optimizer(
+  const data_type_optimizer<TensorDataType>& other)
+  : BaseType(other),
+    m_weights(other.m_weights),
+    m_gradient(other.m_gradient ? other.m_gradient->Copy() : nullptr),
+    m_gradient_v(other.m_gradient_v ? other.m_gradient_v->Copy() : nullptr),
+    m_learning_rate(other.m_learning_rate)
+{}
+
+template <typename TensorDataType>
+data_type_optimizer<TensorDataType>&
+data_type_optimizer<TensorDataType>::operator=(
+  const data_type_optimizer<TensorDataType>& other)
+{
+  optimizer::operator=(other);
+  m_weights = other.m_weights;
+  m_gradient.reset(other.m_gradient ? other.m_gradient->Copy() : nullptr);
+  m_gradient_v.reset(other.m_gradient_v ? other.m_gradient_v->Copy() : nullptr);
+  m_learning_rate = other.m_learning_rate;
+  return *this;
+}
+
+template <typename TensorDataType>
+description data_type_optimizer<TensorDataType>::get_description() const
+{
+  description desc = optimizer::get_description();
+  desc.add("Learning rate", m_learning_rate);
+  return desc;
+}
+
+template <typename TensorDataType>
+auto data_type_optimizer<TensorDataType>::get_weights() -> WeightsType&
+{
+  // Item 3, p. 23 in "Effective C++", 3rd ed., by Scott Meyers
+  return const_cast<WeightsType&>(
+    static_cast<const data_type_optimizer&>(*this).get_weights());
+}
+
+template <typename TensorDataType>
+auto data_type_optimizer<TensorDataType>::get_weights() const
+  -> const WeightsType&
+{
+  if (m_weights == nullptr) {
+    LBANN_ERROR("attempted to access the weights being optimized "
+                "before they are set");
+  }
+  return *m_weights;
+}
+
+template <typename TensorDataType>
+auto data_type_optimizer<TensorDataType>::get_gradient() -> AbsDistMatrixType&
+{
+
+  // Make sure gradient matrix has been setup
+  if (m_gradient == nullptr) {
+    LBANN_ERROR("attempted to access gradient before it is set up");
+  }
+
+  // Make sure gradient values are ready
+  this->start_gradient_allreduce();
+  this->finish_gradient_allreduce();
+
+  // Gather all gradients to the master precision
+  this->accumulate_all_gradient_contributions(*m_gradient);
+
+  // Return gradient
+  return *m_gradient;
+}
+
+template <typename TensorDataType>
+void data_type_optimizer<TensorDataType>::setup(weights* w_in)
+{
+  if (auto* w = dynamic_cast<WeightsType*>(w_in))
+    this->setup(w);
+  else
+    LBANN_ERROR("Incompatible weights type.");
+}
+
+template <typename TensorDataType>
+void data_type_optimizer<TensorDataType>::setup(WeightsType* w)
+{
+  this->setup_base(w);
+}
+
+template <typename TensorDataType>
+void data_type_optimizer<TensorDataType>::setup_base(WeightsType* w)
+{
+  this->set_comm(w->get_comm());
+  this->clear_gradient();
+
+  // Set weights being optimized
+  if (w != nullptr) {
+    set_weights(w);
+  }
+  if (m_weights == nullptr) {
+    LBANN_ERROR("attempted to setup optimizer without weights");
+  }
+
+  // Initialize matrices
+  const auto& height = m_weights->get_matrix_height();
+  const auto& width = m_weights->get_matrix_width();
+  const AbsDistMatrixType& values = m_weights->get_values();
+  m_gradient.reset(AbsDistMatrixType::Instantiate(values.DistData()));
+  m_gradient->AlignWith(values);
+  m_gradient->Resize(height, width);
+  m_gradient_v.reset(AbsDistMatrixType::Instantiate(values.DistData()));
+  m_gradient_v->AlignWith(values);
+#ifdef HYDROGEN_HAVE_CUB
+  if (m_gradient_v->GetLocalDevice() == El::Device::GPU) {
+    m_gradient_v->Matrix().SetMemoryMode(1); // CUB GPU memory pool
+  }
+#endif // HYDROGEN_HAVE_CUB
+}
+
+template <typename TensorDataType>
+double data_type_optimizer<TensorDataType>::get_learning_rate() const
+{
+  return m_learning_rate;
+}
+
+template <typename TensorDataType>
+void data_type_optimizer<TensorDataType>::set_learning_rate(
+  double learning_rate)
+{
+  m_learning_rate = learning_rate;
+}
+
+template <typename TensorDataType>
+void data_type_optimizer<TensorDataType>::step()
+{
+  if (m_weights == nullptr) {
+    LBANN_ERROR("attempted to perform optimization step without weights");
+  }
+  const auto start_time = get_time();
+  this->step_compute(m_weights->get_values(), this->get_gradient());
+  this->inc_step_time(get_time() - start_time);
+}
+
+template <typename TensorDataType>
+std::tuple<El::Int, El::Int, El::DistData>
+data_type_optimizer<TensorDataType>::get_matrix_info() const
+{
+  auto const& w = this->get_weights();
+  return {w.get_matrix_height(),
+          w.get_matrix_width(),
+          w.get_matrix_distribution()};
+}
+
 template <typename TensorDataType>
 template <class Archive>
-void
-data_type_optimizer<TensorDataType>
-::serialize(Archive & ar) {
-  ar(cereal::base_class<optimizer>(this),
-     CEREAL_NVP(m_learning_rate));
+void data_type_optimizer<TensorDataType>::serialize(Archive& ar)
+{
+  ar(cereal::base_class<optimizer>(this), CEREAL_NVP(m_learning_rate));
 }
 
 } // namespace lbann
diff --git a/include/lbann/optimizers/optimizer.hpp b/include/lbann/optimizers/optimizer.hpp
index 066769b737e..3418db63bfb 100644
--- a/include/lbann/optimizers/optimizer.hpp
+++ b/include/lbann/optimizers/optimizer.hpp
@@ -92,6 +92,9 @@ class optimizer : public Cloneable<HasAbstractFunction<optimizer>> {
   /** @brief Human-readable description. */
   virtual description get_description() const;
 
+  virtual double get_learning_rate() const = 0;
+  virtual void set_learning_rate(double) = 0;
+
   /** @name Gradient update management */
   ///@{
 
diff --git a/src/callbacks/learning_rate.cpp b/src/callbacks/learning_rate.cpp
index af83631e823..9a795d49ef8 100644
--- a/src/callbacks/learning_rate.cpp
+++ b/src/callbacks/learning_rate.cpp
@@ -71,8 +71,7 @@ void learning_rate::setup(model *m) {
       m_weights.insert(w);
       // Initialize the global learning rate, exactly once.
       if (m_cur_global_lr == 0.0f) {
-        m_cur_global_lr =
-          dynamic_cast<data_type_optimizer<DataType>*>(w->get_optimizer())->get_learning_rate();
+        m_cur_global_lr = w->get_optimizer()->get_learning_rate();
       }
     }
   }
@@ -91,7 +90,7 @@ void learning_rate::on_epoch_end(model *m) {
               << " at epoch " << c.get_epoch() << std::endl;
   }
   for (weights* w : this->get_weights()) {
-    auto *opt = dynamic_cast<data_type_optimizer<DataType>*>(w->get_optimizer());
+    auto *opt = w->get_optimizer();
     const float old_lr = opt->get_learning_rate();
     if (old_lr != new_lr) {
       opt->set_learning_rate(new_lr);
@@ -101,7 +100,7 @@ void learning_rate::on_epoch_end(model *m) {
 
 void learning_rate::on_backward_prop_end(model *m) {
   for (weights *w : this->get_weights()) {
-    auto &opt = dynamic_cast<data_type_optimizer<DataType>&>(*w->get_optimizer());
+    auto &opt = *w->get_optimizer();
     const float old_lr = opt.get_learning_rate();
     const float new_lr = optimizer_schedule(m, opt);
     if (old_lr != new_lr) {
@@ -111,7 +110,7 @@ void learning_rate::on_backward_prop_end(model *m) {
 }
 
 float learning_rate::optimizer_schedule(model *m, optimizer &opt) {
-  return dynamic_cast<data_type_optimizer<DataType>&>(opt).get_learning_rate();
+  return opt.get_learning_rate();
 }
 
 step_learning_rate::step_learning_rate(
diff --git a/src/callbacks/perturb_adam.cpp b/src/callbacks/perturb_adam.cpp
index 0a727290bf4..f738ae843b3 100644
--- a/src/callbacks/perturb_adam.cpp
+++ b/src/callbacks/perturb_adam.cpp
@@ -135,7 +135,7 @@ void perturb_adam::perturb(lbann_comm& comm, adam<DataType>& opt) const {
     std::normal_distribution<DataType> dist(zero, one);
 
     // Perturb log(learning_rate)
-    auto learning_rate = opt.get_learning_rate();
+    DataType learning_rate = opt.get_learning_rate();
     if (m_learning_rate_factor != zero && learning_rate >= zero) {
       auto log_val = std::log(std::max(learning_rate, min_val));
       log_val += m_learning_rate_factor * dist(gen);
diff --git a/src/callbacks/perturb_learning_rate.cpp b/src/callbacks/perturb_learning_rate.cpp
index 6ea599985fd..3901caea179 100644
--- a/src/callbacks/perturb_learning_rate.cpp
+++ b/src/callbacks/perturb_learning_rate.cpp
@@ -71,7 +71,7 @@ void perturb_learning_rate::setup(model* m) {
 
 void perturb_learning_rate::on_batch_begin(model* m) {
   const auto& c = m->get_execution_context();
-  if (m_perturb_during_training && 
+  if (m_perturb_during_training &&
       c.get_step() % m_batch_interval == 0 &&
       c.get_step() > 0) {
     perturb(*m);
@@ -124,7 +124,7 @@ void perturb_learning_rate::perturb(lbann_comm& comm, data_type_optimizer<DataTy
     std::normal_distribution<DataType> dist(zero, one);
 
     // Perturb log(learning_rate)
-    auto learning_rate = opt.get_learning_rate();
+    DataType learning_rate = opt.get_learning_rate();
     if (m_learning_rate_factor != zero && learning_rate >= zero) {
       auto log_val = std::log(std::max(learning_rate, min_val));
       log_val += m_learning_rate_factor * dist(gen);
@@ -137,7 +137,7 @@ void perturb_learning_rate::perturb(lbann_comm& comm, data_type_optimizer<DataTy
   // Communicate new lr  from trainer master processes
   comm.trainer_broadcast(comm.get_trainer_master(),new_lr);
 
-  // Workers update new lr 
+  // Workers update new lr
   opt.set_learning_rate(new_lr);
 
 }
diff --git a/src/callbacks/variable_minibatch.cpp b/src/callbacks/variable_minibatch.cpp
index 2847a770f5f..dcf8f628d4d 100644
--- a/src/callbacks/variable_minibatch.cpp
+++ b/src/callbacks/variable_minibatch.cpp
@@ -136,10 +136,9 @@ void variable_minibatch::on_epoch_end(model *m) {
 void variable_minibatch::change_learning_rate(
   model *m, float new_lr) const {
   for (weights *w : m->get_weights()) {
-    optimizer *opt = w->get_optimizer();
-    if (opt != nullptr) {
-      auto* dt_opt = dynamic_cast<data_type_optimizer<DataType>*>(opt);
-      dt_opt->set_learning_rate(new_lr);
+    if (optimizer *opt = w->get_optimizer()) {
+      auto& dt_opt = dynamic_cast<data_type_optimizer<DataType>&>(*opt);
+      dt_opt.set_learning_rate(new_lr);
     }
   }
 }
@@ -147,10 +146,9 @@ void variable_minibatch::change_learning_rate(
 float variable_minibatch::get_current_learning_rate(
   model *m) const {
   for (weights *w : m->get_weights()) {
-    optimizer *opt = w->get_optimizer();
-    if (opt != nullptr) {
-      auto* dt_opt = dynamic_cast<data_type_optimizer<DataType>*>(opt);
-      return dt_opt->get_learning_rate();
+    if (optimizer *opt = w->get_optimizer()) {
+      auto& dt_opt = dynamic_cast<data_type_optimizer<DataType> const&>(*opt);
+      return dt_opt.get_learning_rate();
     }
   }
   return 0.0f;
diff --git a/src/layers/learning/channelwise_fully_connected.cpp b/src/layers/learning/channelwise_fully_connected.cpp
index 1146b563b15..5119ac80e7e 100644
--- a/src/layers/learning/channelwise_fully_connected.cpp
+++ b/src/layers/learning/channelwise_fully_connected.cpp
@@ -41,7 +41,7 @@ namespace lbann
 #ifdef LBANN_HAS_DISTCONV
 
 template <typename TensorDataType, data_layout Layout, El::Device Device>
-void 
+void
 channelwise_fully_connected_distconv_adapter<TensorDataType, Layout, Device>
 ::setup_distributions(tensor_overlap_constraints &constraints){
 
@@ -70,7 +70,7 @@ ::setup_distributions(tensor_overlap_constraints &constraints){
 }
 
 template <typename TensorDataType, data_layout Layout, El::Device Device>
-void 
+void
 channelwise_fully_connected_distconv_adapter<TensorDataType, Layout, Device>
 ::setup_layer(size_t workspace_capacity){
   data_type_distconv_adapter<TensorDataType>::setup_layer(workspace_capacity);
@@ -80,11 +80,11 @@ ::setup_layer(size_t workspace_capacity){
 }
 
 template <typename TensorDataType, data_layout Layout, El::Device Device>
-void 
+void
 channelwise_fully_connected_distconv_adapter<TensorDataType, Layout, Device>
 ::setup_fp_tensors(){
   data_type_distconv_adapter<TensorDataType>::setup_fp_tensors();
-  
+
   // dc::MPIRootPrintStreamInfo() << "STARTING SETTING UP FP TENSORS " << std::endl;
 
   auto &layer = dynamic_cast<
@@ -100,19 +100,19 @@ ::setup_fp_tensors(){
   // Create distribution from distconv
   auto shared_dist = dc::Dist::make_shared_distribution(
     input_dist.get_locale_shape());
-  // Create LocaleMPI via distconv 
+  // Create LocaleMPI via distconv
 
   const dc::LocaleMPI loc(dc::get_mpi_comm(), false);
 
-  // Create new distconv tensor using distribution 
+  // Create new distconv tensor using distribution
 
   m_linear = make_unique<TensorDevType>(linearity_shape, loc, shared_dist);
 
-  // This distconv tensor m_linear will be Viewed during forward compute 
+  // This distconv tensor m_linear will be Viewed during forward compute
 
   // Apply bias
   if(layer.m_has_bias){
-    // get bias shape 
+    // get bias shape
     const auto& bias_dims = layer.get_bias_dims();
     dc::Shape bias_shape(bias_dims);
     m_bias = make_unique<TensorDevType>(bias_shape, loc, shared_dist);
@@ -120,7 +120,7 @@ ::setup_fp_tensors(){
 }
 
 template <typename TensorDataType, data_layout Layout, El::Device Device>
-void 
+void
 channelwise_fully_connected_distconv_adapter<TensorDataType, Layout, Device>
 ::setup_bp_tensors(){
   data_type_distconv_adapter<TensorDataType>::setup_bp_tensors();
@@ -128,7 +128,7 @@ ::setup_bp_tensors(){
   auto &layer = dynamic_cast<
     channelwise_fully_connected_layer<TensorDataType, Layout, Device>&>(this->layer());
 
-  //  Setup backward pass tensors here 
+  //  Setup backward pass tensors here
 
   // create LocaleMPI from distconv
   const dc::LocaleMPI loc(dc::get_mpi_comm(), false);
@@ -147,11 +147,11 @@ ::setup_bp_tensors(){
   assert0(dc::tensor::View(*m_linearity_gradient,
                            linearity_optimizer->get_gradient().Buffer()));
   if(layer.m_has_bias){
-    // Get bias optimizer 
+    // Get bias optimizer
     auto *bias_optimizer = static_cast<data_type_optimizer<TensorDataType>*>(layer.get_weights(1).get_optimizer());
 
     if(bias_optimizer != nullptr){
-      // create shape for bias grad 
+      // create shape for bias grad
       const auto& bias_dims = layer.get_bias_dims();
       dc::Shape bias_shape(bias_dims );
       m_bias_gradient = make_unique<TensorDevType>(bias_shape, loc, shared_dist);
@@ -165,17 +165,17 @@ ::setup_bp_tensors(){
 }
 
 template <typename TensorDataType, data_layout Layout, El::Device Device>
-void 
+void
 channelwise_fully_connected_distconv_adapter<TensorDataType, Layout, Device>
 ::fp_compute(){
 
   auto &layer = dynamic_cast<
     channelwise_fully_connected_layer<TensorDataType, Layout, Device>&>(this->layer());
-  
+
   const auto& linearity = layer.weights_values(0);
 
-  // TO DO: Check if input and output tensors are contiguous 
-  
+  // TO DO: Check if input and output tensors are contiguous
+
   assert0(dc::tensor::View(*m_linear,linearity.LockedBuffer()));
 
   m_linear_operator->forward(layer.m_transpose,
@@ -193,7 +193,7 @@ ::fp_compute(){
 }
 
 template <typename TensorDataType, data_layout Layout, El::Device Device>
-void 
+void
 channelwise_fully_connected_distconv_adapter<TensorDataType, Layout, Device>
 ::bp_compute(){
   auto &layer = dynamic_cast<channelwise_fully_connected_layer
@@ -218,7 +218,7 @@ ::bp_compute(){
   if (linearity_optimizer == nullptr){
     dc::MPIRootPrintStreamInfo() << "Weights optimizer null. Exiting ...." << std::endl;
     return;
-  } 
+  }
   auto& linearity_gradient = linearity_optimizer->get_gradient_buffer(
       dst_scale, gradient_scale, true);
 
@@ -233,7 +233,7 @@ ::bp_compute(){
   if(layer.m_has_bias){
     auto* bias_optimizer = static_cast<data_type_optimizer<TensorDataType>*>(layer.get_weights(1).get_optimizer());
     if (bias_optimizer == nullptr) return;
-    
+
     auto& bias_gradient = bias_optimizer->get_gradient_buffer(
         dst_scale, gradient_scale, true);
 
@@ -252,11 +252,11 @@ dc::Shape
 channelwise_fully_connected_distconv_adapter<TensorDataType, Layout, Device>
 ::get_activations_local_shape(int index) const{
 
-  // The default case assumes that the local is shape is the same as 
-  // the local shape of the first previous activations 
+  // The default case assumes that the local is shape is the same as
+  // the local shape of the first previous activations
 
-  // Need to update such that the height and width dimensions match 
-  // match the output dimensions expected  
+  // Need to update such that the height and width dimensions match
+  // match the output dimensions expected
 
   const auto &layer = dynamic_cast<const channelwise_fully_connected_layer
     <TensorDataType, Layout, Device>&>(this->layer());
@@ -264,7 +264,7 @@ ::get_activations_local_shape(int index) const{
   auto linearity_dims = layer.get_linearity_dims();
 
   std::reverse(std::begin(linearity_dims), std::end(linearity_dims));
-  const auto output_shape = 
+  const auto output_shape =
     ::distconv::get_fc_output_local_tensor_shape(
       this->get_prev_activations(), linearity_dims, layer.m_transpose);
   return output_shape;
@@ -276,7 +276,7 @@ ::get_activations_local_shape(int index) const{
 
 
 template <typename TensorDataType, data_layout Layout, El::Device Device>
-bool 
+bool
 channelwise_fully_connected_layer<TensorDataType, Layout, Device>
 ::is_distconv_supported() const {
   return Device==El::Device::GPU && Layout == data_layout::DATA_PARALLEL;
@@ -395,21 +395,21 @@ ::get_linearity_dims() const
 {
   const auto& input_dims = this->get_input_dims();
   const auto& output_dims = this->get_output_dims();
-  
+
   const std::vector<size_t> input_channel_dims(
     input_dims.begin()+1, input_dims.end());
-  
+
   const std::vector<size_t> output_channel_dims(
     output_dims.begin()+1, output_dims.end());
-  
+
   const auto& input_channel_size = std::accumulate(
     input_channel_dims.begin(), input_channel_dims.end(),
     1, std::multiplies<size_t>());
-  
+
   const auto& output_channel_size = std::accumulate(
     output_channel_dims.begin(), output_channel_dims.end(),
     1, std::multiplies<size_t>());
-  
+
   const auto linearity_dim_rows = this->m_transpose ? output_channel_size : input_channel_size;
   const auto linearity_dims_cols = this->m_transpose ? input_channel_size : output_channel_size;
   std::vector<int> linearity_dims{1, 1, linearity_dim_rows, linearity_dims_cols};
@@ -422,7 +422,7 @@ channelwise_fully_connected_layer<TensorDataType,Layout,Device>
 ::get_bias_dims() const
 {
   const auto& output_dims = this->get_output_dims();
-  
+
   const std::vector<size_t> output_channel_dims(
     output_dims.begin()+1, output_dims.end());
 
@@ -529,10 +529,9 @@ ::setup_data(size_t max_mini_batch_size)
     auto dist = this->get_prev_activations().DistData();
     dist.colDist = El::STAR;
     dist.rowDist = El::STAR;
-    auto* cast_initializer = dynamic_cast<variance_scaling_initializer<TensorDataType>*>(linearity_weights.get_initializer());
-    if (cast_initializer != nullptr) {
-      cast_initializer->set_fan_in(input_channel_size);
-      cast_initializer->set_fan_out(output_channel_size);
+    if (auto* initializer = linearity_weights.get_initializer()) {
+      set_fan_in(*initializer, input_channel_size);
+      set_fan_out(*initializer, output_channel_size);
     }
     linearity_weights.set_dims(
       m_transpose ? input_channel_dims : output_channel_dims,
@@ -578,11 +577,11 @@ ::fp_compute()
 {
 
 #ifdef LBANN_HAS_DISTCONV
-  // We are guaranteed to have 
+  // We are guaranteed to have
   if(this->distconv_enabled()){
     this->get_distconv_adapter().fp_compute();
     return ;
-  } 
+  }
 
 #endif // LBANN_HAS_DISTCONV
 
@@ -847,7 +846,7 @@ std::unique_ptr<Layer> build_channelwise_fully_connected_layer_from_pbuf(
 
 #define PROTO_DEVICE(T, Device)            \
   template class channelwise_fully_connected_distconv_adapter<         \
-    T,data_layout::DATA_PARALLEL, Device>                             
+    T,data_layout::DATA_PARALLEL, Device>
 #include "lbann/macros/instantiate_device.hpp"
 #undef PROTO_DEVICE
 
diff --git a/src/models/model.cpp b/src/models/model.cpp
index e6b5b17a508..6cac99ee59f 100644
--- a/src/models/model.cpp
+++ b/src/models/model.cpp
@@ -491,17 +491,17 @@ void model::copy_trained_weights_from(std::vector<weights*>& new_weights) {
     return;
   }
   for(size_t i = 0; i < new_weights.size(); ++i) {
-     for (size_t j = 0; j < m_weights.size(); ++j) {
-       //copy only trained weights (that is unfrozen layer)
-       if(m_weights[j]->get_name() == new_weights[i]->get_name() && !new_weights[i]->is_frozen()) {
-         #ifdef LBANN_DEBUG
-         if(m_comm->am_world_master()) std::cout << " Replacing " << m_weights[j]->get_name() << " with " << new_weights[i]->get_name() << std::endl;
-         #endif
-         dynamic_cast<observer_ptr<data_type_weights<DataType>>>(m_weights[j].get())->set_values(
-           dynamic_cast<data_type_weights<DataType>*>(new_weights[i])->get_values());
-       }
-     }
-   }
+    for (size_t j = 0; j < m_weights.size(); ++j) {
+      //copy only trained weights (that is unfrozen layer)
+      if(m_weights[j]->get_name() == new_weights[i]->get_name() && !new_weights[i]->is_frozen()) {
+#ifdef LBANN_DEBUG
+        if(m_comm->am_world_master()) std::cout << " Replacing " << m_weights[j]->get_name() << " with " << new_weights[i]->get_name() << std::endl;
+#endif
+        dynamic_cast<data_type_weights<DataType>&>(*m_weights[j].get()).set_values(
+          dynamic_cast<data_type_weights<DataType> const&>(*new_weights[i]).get_values());
+      }
+    }
+  }
 }
 
 void model::swap_layers(model& other) {
diff --git a/src/optimizers/adagrad.cpp b/src/optimizers/adagrad.cpp
index 6dc93246dd9..94d76833d2b 100644
--- a/src/optimizers/adagrad.cpp
+++ b/src/optimizers/adagrad.cpp
@@ -94,7 +94,7 @@ void adagrad<TensorDataType>::step_compute_cpu(AbsDistMatrixType& values,
   const size_t cache_ldim = m_cache->LDim();
 
   // Apply AdaGrad step
-  const auto& learning_rate = this->get_learning_rate();
+  const auto learning_rate = El::To<TensorDataType>(this->get_learning_rate());
   LBANN_OMP_PARALLEL_FOR_COLLAPSE2
   for (size_t col = 0; col < local_width; ++col) {
     for (size_t row = 0; row < local_height; ++row) {
diff --git a/src/optimizers/adam.cpp b/src/optimizers/adam.cpp
index ca013be510d..73b5eb7f85a 100644
--- a/src/optimizers/adam.cpp
+++ b/src/optimizers/adam.cpp
@@ -131,9 +131,9 @@ void adam<TensorDataType>::step_compute(AbsDistMatrixType& values,
   // Precompute the bias correction and learning rate.
   m_current_beta1 *= m_beta1;
   m_current_beta2 *= m_beta2;
-  const TensorDataType correction = this->get_learning_rate() *
-                              (El::Sqrt(one - m_current_beta2)
-                               / (one - m_current_beta1));
+  const TensorDataType correction =
+    El::To<TensorDataType>(this->get_learning_rate()) *
+    (El::Sqrt(one - m_current_beta2) / (one - m_current_beta1));
 
   switch (values.GetLocalDevice()) {
   case El::Device::CPU: step_compute_cpu(values, gradient, correction); break;
diff --git a/src/optimizers/data_type_optimizer.cpp b/src/optimizers/data_type_optimizer.cpp
index 93675822789..121a5b58545 100644
--- a/src/optimizers/data_type_optimizer.cpp
+++ b/src/optimizers/data_type_optimizer.cpp
@@ -25,160 +25,11 @@
 ////////////////////////////////////////////////////////////////////////////////
 
 #define LBANN_DATA_TYPE_OPTIMIZER_INSTANTIATE
-#include "lbann/comm_impl.hpp"
 #include "lbann/optimizers/data_type_optimizer.hpp"
 #include "lbann/optimizers/data_type_optimizer_impl.hpp"
-#include "lbann/weights/data_type_weights.hpp"
-#include "lbann/utils/timer.hpp"
-#include "lbann/io/persist.hpp"
-
-namespace lbann {
-
-template <typename TensorDataType>
-data_type_optimizer<TensorDataType>::data_type_optimizer(TensorDataType learning_rate)
-  : m_learning_rate(learning_rate) {}
-
-template <typename TensorDataType>
-data_type_optimizer<TensorDataType>::data_type_optimizer(const data_type_optimizer<TensorDataType>& other)
-  : BaseType(other),
-    m_weights(other.m_weights),
-    m_gradient(other.m_gradient ? other.m_gradient->Copy() : nullptr),
-    m_gradient_v(other.m_gradient_v ? other.m_gradient_v->Copy() : nullptr),
-    m_learning_rate(other.m_learning_rate) {}
-
-template <typename TensorDataType>
-data_type_optimizer<TensorDataType>&
-data_type_optimizer<TensorDataType>::operator=(
-  const data_type_optimizer<TensorDataType>& other) {
-  optimizer::operator=(other);
-  m_weights = other.m_weights;
-  m_gradient.reset(other.m_gradient ? other.m_gradient->Copy() : nullptr);
-  m_gradient_v.reset(other.m_gradient_v ? other.m_gradient_v->Copy() : nullptr);
-  m_learning_rate = other.m_learning_rate;
-  return *this;
-}
-
-template <typename TensorDataType>
-description data_type_optimizer<TensorDataType>::get_description() const {
-  description desc = optimizer::get_description();
-  desc.add("Learning rate", m_learning_rate);
-  return desc;
-}
-
-template <typename TensorDataType>
-auto data_type_optimizer<TensorDataType>::get_weights() -> WeightsType& {
-  // Item 3, p. 23 in "Effective C++", 3rd ed., by Scott Meyers
-  return const_cast<WeightsType&>(static_cast<const data_type_optimizer&>(*this).get_weights());
-}
-
-template <typename TensorDataType>
-auto data_type_optimizer<TensorDataType>::get_weights() const -> const WeightsType& {
-  if (m_weights == nullptr) {
-    LBANN_ERROR("attempted to access the weights being optimized "
-                "before they are set");
-  }
-  return *m_weights;
-}
-
-template <typename TensorDataType>
-auto data_type_optimizer<TensorDataType>::get_gradient() -> AbsDistMatrixType& {
-
-  // Make sure gradient matrix has been setup
-  if (m_gradient == nullptr) {
-    LBANN_ERROR("attempted to access gradient before it is set up");
-  }
-
-  // Make sure gradient values are ready
-  this->start_gradient_allreduce();
-  this->finish_gradient_allreduce();
-
-  // Gather all gradients to the master precision
-  this->accumulate_all_gradient_contributions(*m_gradient);
-
-  // Return gradient
-  return *m_gradient;
-
-}
-
-template <typename TensorDataType>
-void data_type_optimizer<TensorDataType>::setup(weights* w_in)
-{
-  if (auto* w = dynamic_cast<WeightsType*>(w_in))
-    this->setup(w);
-  else
-    LBANN_ERROR("Incompatible weights type.");
-}
-
-template <typename TensorDataType>
-void data_type_optimizer<TensorDataType>::setup(WeightsType* w) {
-  this->setup_base(w);
-}
-
-template <typename TensorDataType>
-void data_type_optimizer<TensorDataType>::setup_base(WeightsType* w) {
-  this->set_comm(w->get_comm());
-  this->clear_gradient();
-
-  // Set weights being optimized
-  if (w != nullptr) { set_weights(w); }
-  if (m_weights == nullptr) {
-    LBANN_ERROR("attempted to setup optimizer without weights");
-  }
-
-  // Initialize matrices
-  const auto& height = m_weights->get_matrix_height();
-  const auto& width = m_weights->get_matrix_width();
-  const AbsDistMatrixType& values = m_weights->get_values();
-  m_gradient.reset(AbsDistMatrixType::Instantiate(values.DistData()));
-  m_gradient->AlignWith(values);
-  m_gradient->Resize(height, width);
-  m_gradient_v.reset(AbsDistMatrixType::Instantiate(values.DistData()));
-  m_gradient_v->AlignWith(values);
-#ifdef HYDROGEN_HAVE_CUB
-  if (m_gradient_v->GetLocalDevice() == El::Device::GPU) {
-    m_gradient_v->Matrix().SetMemoryMode(1); // CUB GPU memory pool
-  }
-#endif // HYDROGEN_HAVE_CUB
-
-}
-
-template <typename TensorDataType>
-TensorDataType data_type_optimizer<TensorDataType>::get_learning_rate() const {
-  return m_learning_rate;
-}
-
-template <typename TensorDataType>
-void data_type_optimizer<TensorDataType>::set_learning_rate(TensorDataType learning_rate) {
-  m_learning_rate = learning_rate;
-}
-
-template <typename TensorDataType>
-void data_type_optimizer<TensorDataType>::step() {
-  if (m_weights == nullptr) {
-    LBANN_ERROR("attempted to perform optimization step without weights");
-  }
-  const auto start_time = get_time();
-  this->step_compute(m_weights->get_values(), this->get_gradient());
-  this->inc_step_time(get_time() - start_time);
-}
-
-template <typename TensorDataType>
-std::tuple<El::Int,El::Int,El::DistData>
-data_type_optimizer<TensorDataType>::get_matrix_info() const {
-  auto const& w = this->get_weights();
-  return {
-    w.get_matrix_height(),
-    w.get_matrix_width(),
-    w.get_matrix_distribution()
-  };
-}
-
-} // namespace lbann
 
 #undef PROTO
-#define PROTO(T)                                                                 \
-  template class lbann::data_type_optimizer<T>
-
+#define PROTO(T) template class lbann::data_type_optimizer<T>
 
 #define LBANN_INSTANTIATE_CPU_HALF
 #define LBANN_INSTANTIATE_GPU_HALF
diff --git a/src/optimizers/hypergradient_adam.cpp b/src/optimizers/hypergradient_adam.cpp
index 60a3dbe2131..fb9ecaa0b0e 100644
--- a/src/optimizers/hypergradient_adam.cpp
+++ b/src/optimizers/hypergradient_adam.cpp
@@ -128,8 +128,8 @@ void hypergradient_adam<TensorDataType>::step_compute(AbsDistMatrixType& values,
 
   // Compute the learning rate update.
   TensorDataType lr_update = El::Dot(gradient, *m_old_gradient);
-  auto learning_rate = this->get_learning_rate();
-  learning_rate += m_hyper_learning_rate * lr_update;
+  auto const learning_rate = El::To<TensorDataType>(this->get_learning_rate()) +
+                             m_hyper_learning_rate * lr_update;
   this->set_learning_rate(learning_rate);
 
   // Hypergradient Adam step
diff --git a/src/optimizers/rmsprop.cpp b/src/optimizers/rmsprop.cpp
index 3c284108049..11b1f26305f 100644
--- a/src/optimizers/rmsprop.cpp
+++ b/src/optimizers/rmsprop.cpp
@@ -101,7 +101,7 @@ void rmsprop<TensorDataType>::step_compute_cpu(AbsDistMatrixType& values,
   const size_t cache_ldim = m_cache->LDim();
 
   // Apply RMSprop step
-  const auto& learning_rate = this->get_learning_rate();
+  const auto learning_rate = El::To<TensorDataType>(this->get_learning_rate());
   LBANN_OMP_PARALLEL_FOR_COLLAPSE2
   for (size_t col = 0; col < local_width; ++col) {
     for (size_t row = 0; row < local_height; ++row) {
diff --git a/src/optimizers/sgd.cpp b/src/optimizers/sgd.cpp
index 0da588b55d6..2c71086775b 100644
--- a/src/optimizers/sgd.cpp
+++ b/src/optimizers/sgd.cpp
@@ -121,7 +121,7 @@ void sgd<TensorDataType>::momentum_step_cpu(AbsDistMatrixType& values,
                                             const AbsDistMatrixType& gradient) {
 
   // Get local matrix data
-  const auto& learning_rate = this->get_learning_rate();
+  const auto learning_rate = El::To<TensorDataType>(this->get_learning_rate());
   const size_t local_height = values.LocalHeight();
   const size_t local_width = values.LocalWidth();
   auto* __restrict__ values_buffer = values.Buffer();

From 2350378b534ab39c0b78b87c32d87475777807b1 Mon Sep 17 00:00:00 2001
From: Soumyadip Ghosh <39674354+soumyadipghosh@users.noreply.github.com>
Date: Tue, 5 Oct 2021 19:31:16 -0400
Subject: [PATCH 15/37] Regularized Evolution Search Algorithm (#1972)

* initial commit on regularized evolution

* refactoring MutationStrategy factory methods - build failing

* add missing headers

* Regularized Evolution works now

* fixing a memory leak

* fix issue with correct winning model and minor comment changes

* removing map of metrics

* Refactoring code and changing logic for selecting winning trainer

* removing redundant code and minor fixes

* clang-format the diff

Co-authored-by: Thomas R. Benson <benson31@llnl.gov>
---
 .../ltfb/mutation_strategy.hpp                |  25 +-
 .../ltfb/regularized_evolution.hpp            | 116 ++++++++
 python/lbann/core/training_algorithm.py       |  36 +++
 src/execution_algorithms/ltfb/CMakeLists.txt  |   1 +
 .../ltfb/checkpoint_common.hpp                |  48 +++
 .../ltfb/meta_learning_strategy.cpp           |   3 +
 .../ltfb/mutation_strategy.cpp                | 101 +++++++
 .../ltfb/random_pairwise_exchange.cpp         |  62 ----
 .../ltfb/regularized_evolution.cpp            | 281 ++++++++++++++++++
 .../ltfb/truncation_selection_exchange.cpp    |  44 +--
 src/proto/training_algorithm.proto            |  17 ++
 11 files changed, 629 insertions(+), 105 deletions(-)
 create mode 100644 include/lbann/execution_algorithms/ltfb/regularized_evolution.hpp
 create mode 100644 src/execution_algorithms/ltfb/regularized_evolution.cpp

diff --git a/include/lbann/execution_algorithms/ltfb/mutation_strategy.hpp b/include/lbann/execution_algorithms/ltfb/mutation_strategy.hpp
index 50209185519..95da70c8daa 100644
--- a/include/lbann/execution_algorithms/ltfb/mutation_strategy.hpp
+++ b/include/lbann/execution_algorithms/ltfb/mutation_strategy.hpp
@@ -26,6 +26,13 @@
 #ifndef LBANN_EXECUTION_ALGORITHMS_LTFB_MUTATION_STRATEGY_HPP_INCLUDED
 #define LBANN_EXECUTION_ALGORITHMS_LTFB_MUTATION_STRATEGY_HPP_INCLUDED
 
+#include "lbann/proto/helpers.hpp"
+#include "lbann/utils/factory.hpp"
+#include "lbann/utils/factory_error_policies.hpp"
+#include "lbann/utils/make_abstract.hpp"
+
+#include <google/protobuf/message.h>
+
 #include "lbann/models/model.hpp"
 #include "lbann/utils/cloneable.hpp"
 
@@ -64,13 +71,29 @@ class ReplaceActivation final
 };
 
 // Replace Convolution layers
-class ReplaceConvolution final : public Cloneable<ReplaceConvolution, MutationStrategy>
+class ReplaceConvolution final
+  : public Cloneable<ReplaceConvolution, MutationStrategy>
 {
 public:
   ReplaceConvolution() = default;
   void mutate(model& m, const int& step) final;
 };
 
+// Hybrid mutation for Regularized Evolution mutation
+// Alternates between ReplaceActivation and ReplaceConvolution randomly
+class HybridMutation final : public Cloneable<HybridMutation, MutationStrategy>
+{
+public:
+  HybridMutation() = default;
+  void mutate(model& m, const int& step) final;
+};
+
 } // namespace ltfb
 } // namespace lbann
+
+template <>
+std::unique_ptr<lbann::ltfb::MutationStrategy>
+lbann::make_abstract<lbann::ltfb::MutationStrategy>(
+  google::protobuf::Message const& params);
+
 #endif // LBANN_EXECUTION_ALGORITHMS_LTFB_MUTATION_STRATEGY_HPP_INCLUDED
diff --git a/include/lbann/execution_algorithms/ltfb/regularized_evolution.hpp b/include/lbann/execution_algorithms/ltfb/regularized_evolution.hpp
new file mode 100644
index 00000000000..bb9c2665d59
--- /dev/null
+++ b/include/lbann/execution_algorithms/ltfb/regularized_evolution.hpp
@@ -0,0 +1,116 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+#ifndef LBANN_EXECUTION_ALGORITHMS_LTFB_REGULARIZED_EVOLUTION_HPP_INCLUDED
+#define LBANN_EXECUTION_ALGORITHMS_LTFB_REGULARIZED_EVOLUTION_HPP_INCLUDED
+
+#include "mutation_strategy.hpp"
+
+#include "meta_learning_strategy.hpp"
+
+#include <google/protobuf/message.h>
+
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+namespace lbann {
+namespace ltfb {
+
+/** @class RegularizedEvolution
+ *  This is a meta-learning strategy in population-based training.
+ *  A sample of trainers is chosen from a population in every tournament.
+ *  The best trainer is chosen from that sample according to an evaluation
+ * metric. Then the model from that best trainer is mutated and replaces the
+ * oldest model.
+ */
+
+class RegularizedEvolution final
+  : public Cloneable<RegularizedEvolution, MetaLearningStrategy>
+{
+public:
+  enum class metric_strategy
+  {
+    LOWER_IS_BETTER,
+    HIGHER_IS_BETTER,
+  }; // enum class metric_strategy
+
+public:
+  RegularizedEvolution(std::string metric_name,
+                       metric_strategy winner_strategy,
+                       std::unique_ptr<MutationStrategy> mutate_algo,
+                       int sample_size);
+  ~RegularizedEvolution() = default;
+  RegularizedEvolution(RegularizedEvolution const& other);
+
+  void select_next(model& m,
+                   ltfb::ExecutionContext& ctxt,
+                   data_coordinator& dc) const final;
+
+private:
+  /** @brief Get the value of the given metric from the model. */
+  EvalType
+  evaluate_model(model& m, ExecutionContext& ctxt, data_coordinator& dc) const;
+
+private:
+  /** @brief The strategy for mutation of a model
+   *
+   *  When a trainer loses in a LTFB tournament, the winning model is
+   *  copied over to it and this mutation strategy is applied to the
+   *  copied model to explore a new model. This is relevant to neural
+   *  architecture search (NAS).
+   */
+  std::unique_ptr<MutationStrategy> m_mutate_algo;
+
+  /** @brief Name of the metric for evaluation
+   */
+  std::string m_metric_name;
+
+  /** @brief Strategy to consider for evaluating the metric
+   *  e.g., HIGHER_IS_BETTER or LOWER_IS_BETTER
+   */
+  metric_strategy m_metric_strategy;
+
+  /** @brief The size of the sample to choose from the population in every step
+   */
+  int m_sample_size;
+
+}; // class RegularizedEvolution
+
+} // namespace ltfb
+
+/** @name Builder functions */
+///@{
+
+/** @brief Concrete product builder for RegularizedEvolution. */
+template <>
+std::unique_ptr<ltfb::RegularizedEvolution>
+make(google::protobuf::Message const&);
+
+///@}
+
+} // namespace lbann
+#endif // LBANN_EXECUTION_ALGORITHMS_LTFB_REGULARIZED_EVOLUTION_HPP_INCLUDED
diff --git a/python/lbann/core/training_algorithm.py b/python/lbann/core/training_algorithm.py
index 5df812b583a..4b86e9918ea 100644
--- a/python/lbann/core/training_algorithm.py
+++ b/python/lbann/core/training_algorithm.py
@@ -212,6 +212,9 @@ def export_proto(self):
         elif self.strategy == "replace_convolution":
             ReplaceConvolutionMsg = MutationStrategyMsg.ReplaceConvolution
             msg.replace_convolution.CopyFrom(ReplaceConvolutionMsg())
+        elif self.strategy == "hybrid_mutation":
+            HybridMutationMsg = MutationStrategyMsg.HybridMutation
+            msg.hybrid_mutation.CopyFrom(HybridMutationMsg())
         else:
             raise ValueError("Unknown Strategy")
         return msg
@@ -398,6 +401,39 @@ def export_proto(self):
         msg.truncation_k = self.truncation_k
         return msg
 
+class RegularizedEvolution(MetaLearningStrategy):
+    """ This is a meta-learning strategy in population-based training.
+        A sample of trainers is chosen from a population in every tournament.
+        The best trainer is chosen from that sample according to an evaluation metric.
+        Then the model from that best trainer is mutated and replaces the oldest model.
+    """
+
+    class MetricStrategy:
+        LOWER_IS_BETTER: int = 0
+        HIGHER_IS_BETTER: int = 1
+
+    def __init__(self,
+                 metric_name,
+                 metric_strategy,
+                 mutation_strategy = MutationStrategy(),
+                 sample_size = 0):
+        
+        self.metric_name = metric_name
+        self.metric_strategy = metric_strategy
+        self.mutation_strategy = mutation_strategy
+        self.sample_size = sample_size
+
+    def export_proto(self):
+        """Get a protobuf representation of this object."""
+
+        msg = AlgoProto.RegularizedEvolution()
+
+        msg.metric_name = self.metric_name
+        msg.metric_strategy = self.metric_strategy
+        msg.mutation_strategy.CopyFrom(self.mutation_strategy.export_proto())
+        msg.sample_size = self.sample_size
+        return msg 
+
 class KFAC(TrainingAlgorithm):
     """Kronecker-Factored Approximate Curvature algorithm.
 
diff --git a/src/execution_algorithms/ltfb/CMakeLists.txt b/src/execution_algorithms/ltfb/CMakeLists.txt
index 1a0b907251e..649c7ff6f91 100644
--- a/src/execution_algorithms/ltfb/CMakeLists.txt
+++ b/src/execution_algorithms/ltfb/CMakeLists.txt
@@ -5,6 +5,7 @@ set_full_path(THIS_DIR_SOURCES
   meta_learning_strategy.cpp
   mutation_strategy.cpp
   random_pairwise_exchange.cpp
+  regularized_evolution.cpp
   sendrecv_weights.cpp
   truncation_selection_exchange.cpp
   )
diff --git a/src/execution_algorithms/ltfb/checkpoint_common.hpp b/src/execution_algorithms/ltfb/checkpoint_common.hpp
index 1c3ff82064b..e9cc5f348ec 100644
--- a/src/execution_algorithms/ltfb/checkpoint_common.hpp
+++ b/src/execution_algorithms/ltfb/checkpoint_common.hpp
@@ -33,6 +33,54 @@
 
 namespace lbann {
 namespace ltfb {
+namespace {
+
+// Pack model to ship off
+std::string pack(model const& m)
+{
+  std::ostringstream oss;
+  {
+    RootedBinaryOutputArchive ar(oss, m.get_comm()->get_trainer_grid());
+    ar(m);
+  }
+  return oss.str();
+}
+
+// Send a string to the root of the destination trainer
+void send_string(lbann_comm const& comm,
+                 std::string const& str,
+                 int destination_trainer)
+{
+  size_t size = str.length();
+  comm.send(&size, 1, destination_trainer, /*rank=*/0);
+  comm.send(reinterpret_cast<El::byte const*>(str.data()),
+            size,
+            destination_trainer,
+            /*rank=*/0);
+}
+
+// Receive a string from the root of src_trainer
+std::string recv_string(lbann_comm const& comm, int src_trainer)
+{
+  size_t size = 0;
+  comm.recv(&size, 1, src_trainer);
+  std::string buf;
+  buf.resize(size);
+  comm.recv(reinterpret_cast<El::byte*>(buf.data()), size, src_trainer);
+  return buf;
+}
+
+// Unpack received model
+void unpack(model& m, std::string const& str)
+{
+  std::istringstream iss(str);
+  {
+    RootedBinaryInputArchive ar(iss, m.get_comm()->get_trainer_grid());
+    ar(m);
+  }
+}
+
+} // namespace
 
 inline static void restore_model_weights(
   model& m,
diff --git a/src/execution_algorithms/ltfb/meta_learning_strategy.cpp b/src/execution_algorithms/ltfb/meta_learning_strategy.cpp
index 998c9b6b17b..a55e38ea8ee 100644
--- a/src/execution_algorithms/ltfb/meta_learning_strategy.cpp
+++ b/src/execution_algorithms/ltfb/meta_learning_strategy.cpp
@@ -25,6 +25,7 @@
 ////////////////////////////////////////////////////////////////////////////////
 #include "lbann/execution_algorithms/ltfb/meta_learning_strategy.hpp"
 #include "lbann/execution_algorithms/ltfb/random_pairwise_exchange.hpp"
+#include "lbann/execution_algorithms/ltfb/regularized_evolution.hpp"
 #include "lbann/execution_algorithms/ltfb/truncation_selection_exchange.hpp"
 #include "lbann/proto/helpers.hpp"
 #include "lbann/utils/make_abstract.hpp"
@@ -42,6 +43,8 @@ lbann::ltfb::MetaLearningStrategyFactory build_default_factory()
                            lbann::make<RandomPairwiseExchange>);
   factory.register_builder("TruncationSelectionExchange",
                            lbann::make<TruncationSelectionExchange>);
+  factory.register_builder("RegularizedEvolution",
+                           lbann::make<RegularizedEvolution>);
   return factory;
 }
 
diff --git a/src/execution_algorithms/ltfb/mutation_strategy.cpp b/src/execution_algorithms/ltfb/mutation_strategy.cpp
index 7f5e3997cb2..a207068fe26 100644
--- a/src/execution_algorithms/ltfb/mutation_strategy.cpp
+++ b/src/execution_algorithms/ltfb/mutation_strategy.cpp
@@ -26,7 +26,14 @@
 
 #include "lbann/execution_algorithms/ltfb/mutation_strategy.hpp"
 
+#include "lbann/base.hpp"
 #include "lbann/comm_impl.hpp"
+#include "lbann/proto/helpers.hpp"
+#include "lbann/utils/exception.hpp"
+#include "lbann/utils/factory.hpp"
+#include "lbann/utils/memory.hpp"
+
+#include <training_algorithm.pb.h>
 
 #include "lbann/layers/activations/elu.hpp"
 #include "lbann/layers/activations/leaky_relu.hpp"
@@ -317,5 +324,99 @@ void ReplaceConvolution::mutate(model& m, const int& step)
   }
 }
 
+void HybridMutation::mutate(model& m, const int& step)
+{
+  // Generate a random number to alternate between ReplaceActivation and
+  // ReplaceConvolution
+  int mutation_choice; // 0 - ReplaceActivation, 1 - ReplaceConvolution
+
+  if (m.get_comm()->am_trainer_master()) {
+    mutation_choice = fast_rand_int(get_fast_generator(), 2); // either 0 or 1
+  }
+  m.get_comm()->trainer_broadcast(m.get_comm()->get_trainer_master(),
+                                  mutation_choice);
+
+  if (mutation_choice == 0) {
+    ReplaceActivation().mutate(m, step);
+  }
+  else {
+    ReplaceConvolution().mutate(m, step);
+  }
+}
+
 } // namespace ltfb
 } // namespace lbann
+
+namespace {
+
+using MutationStrategyFactory = lbann::generic_factory<
+  lbann::ltfb::MutationStrategy,
+  std::string,
+  lbann::proto::generate_builder_type<lbann::ltfb::MutationStrategy,
+                                      google::protobuf::Message const&>>;
+
+std::unique_ptr<lbann::ltfb::NullMutation>
+make_null_mutation(google::protobuf::Message const& msg)
+{
+  using NullMutation = lbann_data::MutationStrategy::NullMutation;
+  LBANN_ASSERT(dynamic_cast<NullMutation const*>(&msg));
+  return std::make_unique<lbann::ltfb::NullMutation>();
+}
+
+std::unique_ptr<lbann::ltfb::ReplaceActivation>
+make_replace_activation(google::protobuf::Message const& msg)
+{
+  using ReplaceActivation = lbann_data::MutationStrategy::ReplaceActivation;
+  LBANN_ASSERT(dynamic_cast<ReplaceActivation const*>(&msg));
+  return std::make_unique<lbann::ltfb::ReplaceActivation>();
+}
+
+std::unique_ptr<lbann::ltfb::ReplaceConvolution>
+make_replace_convolution(google::protobuf::Message const& msg)
+{
+  using ReplaceConvolution = lbann_data::MutationStrategy::ReplaceConvolution;
+  LBANN_ASSERT(dynamic_cast<ReplaceConvolution const*>(&msg));
+  return std::make_unique<lbann::ltfb::ReplaceConvolution>();
+}
+
+std::unique_ptr<lbann::ltfb::HybridMutation>
+make_hybrid_mutation(google::protobuf::Message const& msg)
+{
+  using HybridMutation = lbann_data::MutationStrategy::HybridMutation;
+  LBANN_ASSERT(dynamic_cast<HybridMutation const*>(&msg));
+  return std::make_unique<lbann::ltfb::HybridMutation>();
+}
+
+MutationStrategyFactory build_default_mutation_factory()
+{
+  MutationStrategyFactory factory;
+  factory.register_builder("NullMutation", make_null_mutation);
+  factory.register_builder("ReplaceActivation", make_replace_activation);
+  factory.register_builder("ReplaceConvolution", make_replace_convolution);
+  factory.register_builder("HybridMutation", make_hybrid_mutation);
+  return factory;
+}
+
+MutationStrategyFactory& get_mutation_factory()
+{
+  static MutationStrategyFactory factory = build_default_mutation_factory();
+  return factory;
+}
+
+} // namespace
+
+// For MutationStrategy
+template <>
+std::unique_ptr<lbann::ltfb::MutationStrategy>
+lbann::make_abstract<lbann::ltfb::MutationStrategy>(
+  const google::protobuf::Message& msg)
+{
+  using ProtoStrategy = lbann_data::MutationStrategy;
+  auto const& params = dynamic_cast<ProtoStrategy const&>(msg);
+
+  auto const& mutate_params =
+    proto::helpers::get_oneof_message(params, "strategy");
+  return get_mutation_factory().create_object(
+    proto::helpers::message_type(mutate_params),
+    mutate_params);
+}
diff --git a/src/execution_algorithms/ltfb/random_pairwise_exchange.cpp b/src/execution_algorithms/ltfb/random_pairwise_exchange.cpp
index acb3687b7cc..ca31cab5d47 100644
--- a/src/execution_algorithms/ltfb/random_pairwise_exchange.cpp
+++ b/src/execution_algorithms/ltfb/random_pairwise_exchange.cpp
@@ -426,52 +426,6 @@ ExchangeStrategyFactory& get_exchange_factory()
   return factory;
 }
 
-using MutationStrategyFactory = lbann::generic_factory<
-  lbann::ltfb::MutationStrategy,
-  std::string,
-  lbann::proto::generate_builder_type<
-    lbann::ltfb::MutationStrategy,
-    google::protobuf::Message const&>>;
-
-std::unique_ptr<lbann::ltfb::NullMutation>
-make_null_mutation(google::protobuf::Message const& msg)
-{
-  using NullMutation = lbann_data::MutationStrategy::NullMutation;
-  LBANN_ASSERT(dynamic_cast<NullMutation const*>(&msg));
-  return std::make_unique<lbann::ltfb::NullMutation>();
-}
-
-std::unique_ptr<lbann::ltfb::ReplaceActivation>
-make_replace_activation(google::protobuf::Message const& msg)
-{
-  using ReplaceActivation = lbann_data::MutationStrategy::ReplaceActivation;
-  LBANN_ASSERT(dynamic_cast<ReplaceActivation const*>(&msg));
-  return std::make_unique<lbann::ltfb::ReplaceActivation>();
-}
-
-std::unique_ptr<lbann::ltfb::ReplaceConvolution>
-make_replace_convolution(google::protobuf::Message const& msg)
-{
-  using ReplaceConvolution = lbann_data::MutationStrategy::ReplaceConvolution;
-  LBANN_ASSERT(dynamic_cast<ReplaceConvolution const*>(&msg));
-  return std::make_unique<lbann::ltfb::ReplaceConvolution>();
-}
-
-MutationStrategyFactory build_default_mutation_factory()
-{
-  MutationStrategyFactory factory;
-  factory.register_builder("NullMutation", make_null_mutation);
-  factory.register_builder("ReplaceActivation", make_replace_activation);
-  factory.register_builder("ReplaceConvolution", make_replace_convolution);
-  return factory;
-}
-
-MutationStrategyFactory& get_mutation_factory()
-{
-  static MutationStrategyFactory factory = build_default_mutation_factory();
-  return factory;
-}
-
 } // namespace
 
 // For ExchangeStrategy
@@ -496,22 +450,6 @@ lbann::make_abstract<lbann::ltfb::RandomPairwiseExchange::ExchangeStrategy>(
     exchange_params);
 }
 
-// For MutationStrategy
-template <>
-std::unique_ptr<lbann::ltfb::MutationStrategy>
-lbann::make_abstract<lbann::ltfb::MutationStrategy>(
-  const google::protobuf::Message& msg)
-{
-  using ProtoStrategy = lbann_data::MutationStrategy;
-  auto const& params = dynamic_cast<ProtoStrategy const&>(msg);
-
-  auto const& mutate_params =
-    proto::helpers::get_oneof_message(params, "strategy");
-  return get_mutation_factory().create_object(
-    proto::helpers::message_type(mutate_params),
-    mutate_params);
-}
-
 template <>
 std::unique_ptr<lbann::ltfb::RandomPairwiseExchange>
 lbann::make<lbann::ltfb::RandomPairwiseExchange>(
diff --git a/src/execution_algorithms/ltfb/regularized_evolution.cpp b/src/execution_algorithms/ltfb/regularized_evolution.cpp
new file mode 100644
index 00000000000..2d94ce122d3
--- /dev/null
+++ b/src/execution_algorithms/ltfb/regularized_evolution.cpp
@@ -0,0 +1,281 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/execution_algorithms/ltfb/mutation_strategy.hpp"
+
+#include "lbann/execution_algorithms/ltfb/regularized_evolution.hpp"
+
+#include "checkpoint_common.hpp"
+
+#include "lbann/base.hpp"
+#include "lbann/comm_impl.hpp"
+#include "lbann/data_coordinator/data_coordinator.hpp"
+#include "lbann/models/directed_acyclic_graph.hpp"
+#include "lbann/models/model.hpp"
+#include "lbann/proto/helpers.hpp"
+#include "lbann/trainers/trainer.hpp"
+#include "lbann/utils/exception.hpp"
+#include "lbann/utils/memory.hpp"
+#include "lbann/weights/data_type_weights_impl.hpp"
+
+#include <training_algorithm.pb.h>
+
+#include <algorithm>
+#include <iterator>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace lbann {
+namespace ltfb {
+
+// RegularizedEvolution Implementation
+
+RegularizedEvolution::RegularizedEvolution(
+  std::string metric_name,
+  metric_strategy winner_strategy,
+  std::unique_ptr<MutationStrategy> mutate_algo,
+  int sample_size)
+  : m_mutate_algo{std::move(mutate_algo)},
+    m_metric_name{std::move(metric_name)},
+    m_metric_strategy{std::move(winner_strategy)},
+    m_sample_size{std::move(sample_size)}
+{}
+
+RegularizedEvolution::RegularizedEvolution(RegularizedEvolution const& other)
+  : m_mutate_algo{other.m_mutate_algo->clone()},
+    m_metric_name{other.m_metric_name},
+    m_metric_strategy{other.m_metric_strategy},
+    m_sample_size{other.m_sample_size}
+{}
+
+EvalType RegularizedEvolution::evaluate_model(model& m,
+                                              ExecutionContext& ctxt,
+                                              data_coordinator& dc) const
+
+{
+  // Make sure data readers finish asynchronous work
+  const auto original_mode = ctxt.get_execution_mode();
+  dc.collect_background_data_fetch(original_mode);
+
+  // Can use validation if it is global
+  if (!dc.is_execution_mode_valid(execution_mode::tournament)) {
+    LBANN_ERROR("Regularized Evolution requires ",
+                to_string(execution_mode::tournament),
+                " execution mode");
+  }
+
+  // Mark the data store as loading - Note that this is a temporary fix
+  // for the current use of the tournament
+  m.mark_data_store_explicitly_loading(execution_mode::tournament);
+
+  // Evaluate model on test (or validation?) set
+  get_trainer().evaluate(&m, execution_mode::tournament);
+
+  // Get metric values
+  bool found_metric = false;
+  EvalType score = 0.f;
+  std::string metric_name;
+  for (const auto& met : m.get_metrics()) {
+    metric_name = met->name();
+    if (metric_name == m_metric_name) {
+      found_metric = true;
+      score = met->get_mean_value(execution_mode::tournament);
+      break;
+    }
+  }
+
+  // sanity check
+  if (!found_metric) {
+    LBANN_ERROR("could not find metric \"",
+                m_metric_name,
+                "\" "
+                "in model \"",
+                m.get_name(),
+                "\"");
+  }
+
+  m.make_data_store_preloaded(execution_mode::tournament);
+
+  // Clean up and return metric score
+  m.reset_mode(ctxt, original_mode);
+  dc.reset_mode(ctxt);
+  return score;
+}
+
+void RegularizedEvolution::select_next(model& m,
+                                       ltfb::ExecutionContext& ctxt,
+                                       data_coordinator& dc) const
+{
+  auto const& comm = *(m.get_comm());
+  const unsigned int num_trainers = comm.get_num_trainers();
+  const int trainer_id = comm.get_trainer_rank();
+  auto const step = ctxt.get_step();
+
+  std::vector<unsigned> sample_trainers(num_trainers);
+  if (comm.am_world_master()) {
+    std::iota(begin(sample_trainers), end(sample_trainers), 0U);
+    std::shuffle(sample_trainers.begin(),
+                 sample_trainers.end(),
+                 get_ltfb_generator());
+
+    // Print trainers selected in sample
+    std::cout << "Trainers in sample at step " << step << " -";
+    for (int i = 0; i < m_sample_size; i++)
+      std::cout << " " << sample_trainers[i];
+    std::cout << std::endl;
+  }
+  comm.world_broadcast(comm.get_world_master(),
+                       sample_trainers.data(),
+                       num_trainers);
+
+  El::Int score = evaluate_model(m, ctxt, dc);
+
+  // AllGather scores from all trainers
+  std::vector<EvalType> score_list_all(num_trainers);
+  comm.trainer_barrier();
+  if (comm.am_trainer_master()) {
+    comm.all_gather<EvalType>(score,
+                              score_list_all,
+                              comm.get_intertrainer_comm());
+  }
+
+  // Use scores only for samples selected from sample_trainers above
+  // and place them in the same order as in sample_trainers
+  std::vector<EvalType> score_list_samples(m_sample_size);
+  for (int i = 0; i < m_sample_size; i++) {
+    score_list_samples[i] = score_list_all[sample_trainers[i]];
+  }
+
+  // Communicate sample score list from trainer master to other procs in
+  // trainer
+  comm.trainer_broadcast(comm.get_trainer_master(),
+                         score_list_samples.data(),
+                         m_sample_size);
+
+  // Find winning trainer in sample according to metric strategy
+  El::Int winner_id;
+  if (m_metric_strategy ==
+      RegularizedEvolution::metric_strategy::HIGHER_IS_BETTER)
+    winner_id = sample_trainers[std::distance(
+      score_list_samples.begin(),
+      std::max_element(score_list_samples.begin(), score_list_samples.end()))];
+  else if (m_metric_strategy ==
+           RegularizedEvolution::metric_strategy::LOWER_IS_BETTER)
+    winner_id = sample_trainers[std::distance(
+      score_list_samples.begin(),
+      std::min_element(score_list_samples.begin(), score_list_samples.end()))];
+  else
+    LBANN_ERROR("Invalid metric strategy!");
+
+  // Find oldest trainer - cycle through trainer ids
+  El::Int oldest_id = step % num_trainers;
+
+  // Print winning and oldest model
+  if (comm.am_world_master()) {
+    std::cout << "Winner - " << winner_id << ", Oldest - " << oldest_id
+              << std::endl;
+  }
+
+  if (trainer_id == winner_id) {
+
+    if (winner_id != oldest_id) {
+      auto model_string = pack(m);
+      if (comm.am_trainer_master()) {
+        send_string(comm, model_string, oldest_id);
+        std::cout << "In Reg Evo step " << step << ", trainer " << trainer_id
+                  << " with score " << score_list_all[trainer_id]
+                  << " sends model to trainer " << oldest_id << std::endl;
+      }
+    }
+  }
+
+  if (trainer_id == oldest_id) {
+
+    if (winner_id != oldest_id) {
+      std::string rcv_str;
+      if (comm.am_trainer_master()) {
+        rcv_str = recv_string(comm, winner_id);
+        std::cout << "In Reg Evo step " << step << ", trainer " << trainer_id
+                  << " receives model from trainer " << winner_id << std::endl;
+      }
+
+      unpack(m, rcv_str);
+    }
+
+    // Mutating oldest model
+    m_mutate_algo->mutate(m, step);
+
+    auto& trainer = get_trainer();
+    auto&& metadata = trainer.get_data_coordinator().get_dr_metadata();
+    m.setup(trainer.get_max_mini_batch_size(),
+            metadata,
+            /*force*/ true);
+  }
+}
+
+} // namespace ltfb
+} // namespace lbann
+
+namespace {
+
+lbann::ltfb::RegularizedEvolution::metric_strategy
+to_lbann(lbann_data::RegularizedEvolution::MetricStrategy strategy)
+{
+  using LBANNEnumType = lbann::ltfb::RegularizedEvolution::metric_strategy;
+  using ProtoEnumType = lbann_data::RegularizedEvolution::MetricStrategy;
+  switch (strategy) {
+  case ProtoEnumType::RegularizedEvolution_MetricStrategy_LOWER_IS_BETTER:
+    return LBANNEnumType::LOWER_IS_BETTER;
+  case ProtoEnumType::RegularizedEvolution_MetricStrategy_HIGHER_IS_BETTER:
+    return LBANNEnumType::HIGHER_IS_BETTER;
+  default:
+    LBANN_ERROR("Unknown enum value: ", static_cast<int>(strategy));
+  }
+  return LBANNEnumType::LOWER_IS_BETTER;
+}
+
+} // namespace
+
+template <>
+std::unique_ptr<lbann::ltfb::RegularizedEvolution>
+lbann::make<lbann::ltfb::RegularizedEvolution>(
+  google::protobuf::Message const& msg_in)
+{
+  auto const& params = dynamic_cast<google::protobuf::Any const&>(msg_in);
+  lbann_data::RegularizedEvolution msg;
+  LBANN_ASSERT(params.UnpackTo(&msg));
+
+  using MutationStrategyType = lbann::ltfb::MutationStrategy;
+
+  return make_unique<lbann::ltfb::RegularizedEvolution>(
+    msg.metric_name(),
+    to_lbann(msg.metric_strategy()),
+    make_abstract<MutationStrategyType>(msg.mutation_strategy()),
+    msg.sample_size());
+}
diff --git a/src/execution_algorithms/ltfb/truncation_selection_exchange.cpp b/src/execution_algorithms/ltfb/truncation_selection_exchange.cpp
index a5e433df065..b1877f13d10 100644
--- a/src/execution_algorithms/ltfb/truncation_selection_exchange.cpp
+++ b/src/execution_algorithms/ltfb/truncation_selection_exchange.cpp
@@ -26,6 +26,8 @@
 
 #include "lbann/execution_algorithms/ltfb/truncation_selection_exchange.hpp"
 
+#include "checkpoint_common.hpp"
+
 #include "lbann/base.hpp"
 #include "lbann/comm_impl.hpp"
 #include "lbann/data_coordinator/data_coordinator.hpp"
@@ -65,48 +67,6 @@ bool low_score_wins(TruncationSelectionExchange::metric_strategy strategy)
   return true; // Silence compiler warning about no return.
 }
 
-// Pack model to ship off
-std::string pack(model const& m)
-{
-  std::ostringstream oss;
-  {
-    RootedBinaryOutputArchive ar(oss, m.get_comm()->get_trainer_grid());
-    ar(m);
-  }
-  return oss.str();
-}
-
-// Send a string to the root of the destination trainer
-void send_string(lbann_comm const& comm,
-                 std::string const& str,
-                 int destination_trainer)
-{
-  size_t size = str.length();
-  comm.send(&size, 1, destination_trainer, /*rank=*/0);
-  comm.send(reinterpret_cast<El::byte const*>(str.data()),
-            size,
-            destination_trainer,
-            /*rank=*/0);
-}
-// Receive a string from the root of src_trainer
-std::string recv_string(lbann_comm const& comm, int src_trainer)
-{
-  size_t size = 0;
-  comm.recv(&size, 1, src_trainer);
-  std::string buf;
-  buf.resize(size);
-  comm.recv(reinterpret_cast<El::byte*>(buf.data()), size, src_trainer);
-  return buf;
-}
-// Unpack received model
-void unpack(model& m, std::string const& str)
-{
-  std::istringstream iss(str);
-  {
-    RootedBinaryInputArchive ar(iss, m.get_comm()->get_trainer_grid());
-    ar(m);
-  }
-}
 } // namespace
 
 // TruncationSelectionExchange implementation
diff --git a/src/proto/training_algorithm.proto b/src/proto/training_algorithm.proto
index 77d0dbc3e73..8c25b92d43b 100644
--- a/src/proto/training_algorithm.proto
+++ b/src/proto/training_algorithm.proto
@@ -76,10 +76,14 @@ message MutationStrategy {
   message ReplaceConvolution {
   }
 
+  message HybridMutation {
+  }
+
   oneof strategy {
     NullMutation null_mutation = 1;
     ReplaceActivation replace_activation = 2;
     ReplaceConvolution replace_convolution = 3;
+    HybridMutation hybrid_mutation = 4;
   }
 } //message Mutation Strategy
 
@@ -127,6 +131,19 @@ message TruncationSelectionExchange {
   uint64 truncation_k = 2; //what should be default, 1?
 }// message TruncationSelectionExchange
 
+// Regularized Evolution strategy Implements MetaLearningStrategy.
+message RegularizedEvolution {
+  enum MetricStrategy {
+    LOWER_IS_BETTER = 0;
+    HIGHER_IS_BETTER = 1;
+  }
+
+  string metric_name = 1;
+  MetricStrategy metric_strategy = 2;
+  MutationStrategy mutation_strategy = 3;
+  uint64 sample_size = 4;
+}// message RegularizedEvolution
+
 message KFAC {
 
   SGD sgd = 1;

From 3214f189a1438565d695542e076c4fa8e7332d34 Mon Sep 17 00:00:00 2001
From: Michael Wyatt <wyatt5@llnl.gov>
Date: Thu, 7 Oct 2021 16:00:19 -0700
Subject: [PATCH 16/37] Miopen Pooling and Convolution Bug Fixes (#1982)

* fixed convolution workspace and added path for beta==1

* added fix for pooling index not matching tensor index

* changes to arbitrary alpha and beta values pathway for backward convolution filter
---
 .../utils/dnn_lib/miopen/convolution.hpp      | 49 ++++++++++++++-----
 .../lbann/utils/dnn_lib/miopen/pooling.hpp    |  2 +
 src/layers/learning/base_convolution.cpp      | 24 ++++++---
 3 files changed, 54 insertions(+), 21 deletions(-)

diff --git a/include/lbann/utils/dnn_lib/miopen/convolution.hpp b/include/lbann/utils/dnn_lib/miopen/convolution.hpp
index ccffb5868b1..f460cdf0eaa 100644
--- a/include/lbann/utils/dnn_lib/miopen/convolution.hpp
+++ b/include/lbann/utils/dnn_lib/miopen/convolution.hpp
@@ -268,19 +268,42 @@ void convolution_backward_filter(
   auto handle_manager = internal::make_default_handle_manager(si);
   auto alpha = El::To<LibScalingParamT>(alpha_in);
   auto beta = El::To<LibScalingParamT>(beta_in);
-  CHECK_MIOPEN(miopenConvolutionBackwardWeights(handle_manager.get(),
-                                                &alpha,
-                                                dyDesc,
-                                                dy.LockedBuffer(),
-                                                xDesc,
-                                                x.LockedBuffer(),
-                                                convDesc,
-                                                miopen::to_miopen(alg),
-                                                &beta,
-                                                dwDesc,
-                                                dw.Buffer(),
-                                                workSpace.Buffer(),
-                                                workSpace.Height()*sizeof(TensorDataType)));
+  auto one = El::TypeTraits<LibScalingParamT>::One();
+  auto zero = El::TypeTraits<LibScalingParamT>::Zero();
+  El::Matrix<TensorDataType, El::Device::GPU> dw_old;
+
+  if (alpha_in != El::TypeTraits<LibScalingParamT>::One() ||
+      beta_in != El::TypeTraits<LibScalingParamT>::Zero()) {
+    El::Copy(dw, dw_old);
+    CHECK_MIOPEN(miopenConvolutionBackwardWeights(handle_manager.get(),
+                                                  &one,
+                                                  dyDesc,
+                                                  dy.LockedBuffer(),
+                                                  xDesc,
+                                                  x.LockedBuffer(),
+                                                  convDesc,
+                                                  miopen::to_miopen(alg),
+                                                  &zero,
+                                                  dwDesc,
+                                                  dw.Buffer(),
+                                                  workSpace.Buffer(),
+                                                  workSpace.Height()*sizeof(TensorDataType)));
+    add_tensor(alpha_in, dwDesc, dw, beta_in, dwDesc, dw_old);
+  } else {
+    CHECK_MIOPEN(miopenConvolutionBackwardWeights(handle_manager.get(),
+                                                  &alpha,
+                                                  dyDesc,
+                                                  dy.LockedBuffer(),
+                                                  xDesc,
+                                                  x.LockedBuffer(),
+                                                  convDesc,
+                                                  miopen::to_miopen(alg),
+                                                  &beta,
+                                                  dwDesc,
+                                                  dw.Buffer(),
+                                                  workSpace.Buffer(),
+                                                  workSpace.Height()*sizeof(TensorDataType)));
+  }
 }
 
 template <typename TensorDataType, typename ScalarParameterType>
diff --git a/include/lbann/utils/dnn_lib/miopen/pooling.hpp b/include/lbann/utils/dnn_lib/miopen/pooling.hpp
index 0ca8454e535..acaee1717eb 100644
--- a/include/lbann/utils/dnn_lib/miopen/pooling.hpp
+++ b/include/lbann/utils/dnn_lib/miopen/pooling.hpp
@@ -44,6 +44,8 @@ using namespace miopen;
 inline size_t get_pooling_ws_size(PoolingDescriptor const& poolingDesc,
                                   TensorDescriptor const& yDesc)
 {
+  CHECK_MIOPEN(miopenSetPoolingIndexType(poolingDesc,
+                                         miopenIndexUint32));
   size_t size;
   CHECK_MIOPEN(miopenPoolingGetWorkSpaceSizeV2(poolingDesc,
                                                yDesc,
diff --git a/src/layers/learning/base_convolution.cpp b/src/layers/learning/base_convolution.cpp
index 0aa07f89a2e..d2fcaefd48b 100644
--- a/src/layers/learning/base_convolution.cpp
+++ b/src/layers/learning/base_convolution.cpp
@@ -672,17 +672,17 @@ ::compute_gradients_dnn(bool using_transposed_convolution) {
 
       // Get workspace size
       auto multisync = El::MakeMultiSync(gpu::get_sync_info(workspace));
-      size_t workspace_size =
-        dnn_lib::get_bwd_weights_conv_workspace_size(gradient_wrt_output_desc,
-                                                     input_desc,
-                                                     m_convolution_dnn_desc,
-                                                     m_kernel_dnn_desc,
-                                                     multisync);
-      workspace.Resize(workspace_size / sizeof(TensorDataType), 1);
-      workspace_size = workspace.Height() * sizeof(TensorDataType);
 
       // Determine algorithm and compute kernel gradient
       if (using_transposed_convolution) {
+        size_t workspace_size =
+          dnn_lib::get_bwd_weights_conv_workspace_size(input_desc,
+                                                       gradient_wrt_output_desc,
+                                                       m_convolution_dnn_desc,
+                                                       m_kernel_dnn_desc,
+                                                       multisync);
+        workspace.Resize(workspace_size / sizeof(TensorDataType), 1);
+        workspace_size = workspace.Height() * sizeof(TensorDataType);
         bwd_filter_conv_alg kernel_gradient_dnn_algorithm
           = get_backward_filter_algo_dnn(
             local_input.Width(),
@@ -704,6 +704,14 @@ ::compute_gradients_dnn(bool using_transposed_convolution) {
           m_kernel_dnn_desc,
           kernel_gradient.Matrix());
       } else {
+        size_t workspace_size =
+          dnn_lib::get_bwd_weights_conv_workspace_size(gradient_wrt_output_desc,
+                                                       input_desc,
+                                                       m_convolution_dnn_desc,
+                                                       m_kernel_dnn_desc,
+                                                       multisync);
+        workspace.Resize(workspace_size / sizeof(TensorDataType), 1);
+        workspace_size = workspace.Height() * sizeof(TensorDataType);
         bwd_filter_conv_alg kernel_gradient_dnn_algorithm
           = get_backward_filter_algo_dnn(
             local_input.Width(),

From 4636f27a4b613a108c1cefbddd8381faa8b0d303 Mon Sep 17 00:00:00 2001
From: Sam Ade Jacobs <jacobs32@llnl.gov>
Date: Tue, 12 Oct 2021 10:44:20 -0700
Subject: [PATCH 17/37] PROBIESNet (#1981)

* PROBIESNet implementation

* Fixed a bug in the HDF5 data reader where it failed to check to make
sure that the data store was enabled on the command line.  Fixed a bug
where building the packing map would segfault if there were no samples
in the data store.  Fixed a bug where the fetch_data_field did not
check that the number of elements in the requested field matched the
provided Hydrogen matrix height.  Used a helper function for getting
the data store to ensure that there is a valid pointer.

* In the HDF5 data reader special data fetch functions need to use the
preprocessor macros to define the strings for samples, responses, and
labels.  Added accessor functions to allow unit tests to force the
pack function to leave existing data in place.

* Fixed PROBIES model and HDF5 unit tests to use the proper samples and
responses data fields rather than datum and response.  This makes the
behavior of data fields consistent across user defined and predefined
data fields.  Added unit tests for the explicit fetch_datum and
fetch_response function.

* Fixed data field name for responses

* Added tests to check that fetch_label fails

* Updated release notes

Co-authored-by: Brian C. Van Essen <vanessen1@llnl.gov>
---
 ReleaseNotes.txt                              |   1 +
 .../physics/HRRL/data/probies_v2.prototext    |  22 ++++
 .../physics/HRRL/data/probies_v3.prototext    |  22 ++++
 .../physics/HRRL/models/probiesNet.py         |  51 ++++++++
 applications/physics/HRRL/train_probiesNet.py | 113 ++++++++++++++++++
 .../lbann/data_readers/data_reader_HDF5.hpp   |  17 +--
 src/data_readers/data_reader_HDF5.cpp         |  40 ++++---
 .../data_reader_HDF5_hrrl_data_test.cpp       |  20 ++--
 .../data_reader_HDF5_hrrl_public_api.cpp      |  89 ++++++++++++++
 .../hdf5_hrrl_experiment_schema.yaml          |  12 +-
 .../hdf5_hrrl_test_data_and_schema.yaml       |   4 +-
 11 files changed, 354 insertions(+), 37 deletions(-)
 create mode 100644 applications/physics/HRRL/data/probies_v2.prototext
 create mode 100644 applications/physics/HRRL/data/probies_v3.prototext
 create mode 100644 applications/physics/HRRL/models/probiesNet.py
 create mode 100644 applications/physics/HRRL/train_probiesNet.py

diff --git a/ReleaseNotes.txt b/ReleaseNotes.txt
index 3fc55127ddb..261437521e2 100644
--- a/ReleaseNotes.txt
+++ b/ReleaseNotes.txt
@@ -30,6 +30,7 @@ Model portability & usability:
 
 Experiments & Applications:
  - Example for training Transformer model with D&SP and D&SP-cSub
+ - PROBIESNet model for HRRL data
 
 Internal features:
  - Added operator class
diff --git a/applications/physics/HRRL/data/probies_v2.prototext b/applications/physics/HRRL/data/probies_v2.prototext
new file mode 100644
index 00000000000..e3a6d872bd9
--- /dev/null
+++ b/applications/physics/HRRL/data/probies_v2.prototext
@@ -0,0 +1,22 @@
+data_reader {
+  reader {
+    name: "hdf5_data_reader"
+    role: "train"
+    sample_list: "/p/vast1/lbann/datasets/HRRL/pub/sample_list/expv1_train.sample_list"
+    validation_percent: 0.1
+    tournament_percent: 0.1
+    percent_of_data_to_use: 1.0
+    data_schema_filename: "/p/vast1/lbann/datasets/HRRL/pub/hrrl_data_schema.yaml"
+    experiment_schema_filename: "/p/vast1/lbann/datasets/HRRL/pub/hrrl_experiment_schema.yaml"
+    enable_responses: true
+  }
+  reader {
+    name: "hdf5_data_reader"
+    role: "test"
+    sample_list: "/p/vast1/lbann/datasets/HRRL/pub/sample_list/expv1_test.sample_list"
+    percent_of_data_to_use: 1.0
+    data_schema_filename: "/p/vast1/lbann/datasets/HRRL/pub/hrrl_data_schema.yaml"
+    experiment_schema_filename: "/p/vast1/lbann/datasets/HRRL/pub/hrrl_experiment_schema.yaml"
+    enable_responses: true
+  }
+}
diff --git a/applications/physics/HRRL/data/probies_v3.prototext b/applications/physics/HRRL/data/probies_v3.prototext
new file mode 100644
index 00000000000..fc598b2d8e6
--- /dev/null
+++ b/applications/physics/HRRL/data/probies_v3.prototext
@@ -0,0 +1,22 @@
+data_reader {
+  reader {
+    name: "hdf5_data_reader"
+    role: "train"
+    sample_list: "/p/vast1/lbann/datasets/HRRL/pub/sample_list/expv3_train.sample_list"
+    validation_percent: 0.1
+    tournament_percent: 0.1
+    percent_of_data_to_use: 1.0
+    data_schema_filename: "/p/vast1/lbann/datasets/HRRL/pub/hrrl_data_schema_h5_04Jun2021.yaml"
+    experiment_schema_filename: "/p/vast1/lbann/datasets/HRRL/pub/hrrl_experiment_schema.yaml"
+    enable_responses: true
+  }
+  reader {
+    name: "hdf5_data_reader"
+    role: "test"
+    sample_list: "/p/vast1/lbann/datasets/HRRL/pub/sample_list/expv3_test.sample_list"
+    percent_of_data_to_use: 1.0
+    data_schema_filename: "/p/vast1/lbann/datasets/HRRL/pub/hrrl_data_schema_h5_04Jun2021.yaml"
+    experiment_schema_filename: "/p/vast1/lbann/datasets/HRRL/pub/hrrl_experiment_schema.yaml"
+    enable_responses: true
+  }
+}
diff --git a/applications/physics/HRRL/models/probiesNet.py b/applications/physics/HRRL/models/probiesNet.py
new file mode 100644
index 00000000000..c9c00dba06c
--- /dev/null
+++ b/applications/physics/HRRL/models/probiesNet.py
@@ -0,0 +1,51 @@
+import lbann
+import lbann.modules
+
+class PROBIESNet(lbann.modules.Module):
+
+    global_count = 0  # Static counter, used for default names
+
+    def __init__(self, output_size, name=None):
+        """Initialize PROBIESNet.
+
+        Args:
+            output_size (int): Size of output tensor.
+            name (str, optional): Module name
+                (default: 'probiesnet_module<index>').
+
+        """
+        PROBIESNet.global_count += 1
+        self.instance = 0
+        self.name = (name if name
+                     else 'probiesNet_module{0}'.format(PROBIESNet.global_count))
+        conv = lbann.modules.Convolution2dModule
+        fc = lbann.modules.FullyConnectedModule
+        self.conv1 = conv(36, 11, stride=4, activation=lbann.Relu,
+                          name=self.name+'_conv1')
+        self.conv2 = conv(64, 5, padding=2, activation=lbann.Relu,
+                          name=self.name+'_conv2')
+        self.fc1 = fc(480, activation=lbann.Relu, name=self.name+'_fc1')
+        self.fc2 = fc(240, activation=lbann.Relu, name=self.name+'_fc2')
+        self.fc3 = fc(output_size, name='pred')
+
+    def forward(self, x):
+        self.instance += 1
+
+        x = self.conv1(x)
+        x = lbann.Pooling(x, num_dims=2, has_vectors=False,
+                          pool_dims_i=2, pool_pads_i=0, pool_strides_i=2,
+                          pool_mode='max',
+                          name='{0}_pool1_instance{1}'.format(self.name,self.instance))
+        x = self.conv2(x)
+        x = lbann.Pooling(x, num_dims=2, has_vectors=False,
+                          pool_dims_i=2, pool_pads_i=0, pool_strides_i=2,
+                          pool_mode='max',
+                          name='{0}_pool2_instance{1}'.format(self.name,self.instance))
+
+        x = self.fc1(x)
+        x = lbann.Dropout(x, keep_prob=0.5,
+                          name='{0}_drop6_instance{1}'.format(self.name,self.instance))
+        x = self.fc2(x)
+        x = lbann.Dropout(x, keep_prob=0.5,
+                          name='{0}_drop7_instance{1}'.format(self.name,self.instance))
+        return self.fc3(x)
diff --git a/applications/physics/HRRL/train_probiesNet.py b/applications/physics/HRRL/train_probiesNet.py
new file mode 100644
index 00000000000..31be3325795
--- /dev/null
+++ b/applications/physics/HRRL/train_probiesNet.py
@@ -0,0 +1,113 @@
+from os.path import abspath, dirname, join
+import google.protobuf.text_format as txtf
+import models.probiesNet as model
+import argparse
+import lbann
+import lbann.contrib.args
+import lbann.contrib.launcher
+
+# ==============================================
+# Setup and launch experiment
+# ==============================================
+
+
+
+# Command-line arguments
+desc = ('Construct and run ProbiesNet on HRRL PROBIES data. ')
+parser = argparse.ArgumentParser(description=desc)
+lbann.contrib.args.add_scheduler_arguments(parser)
+parser.add_argument(
+    '--job-name', action='store', default='probiesNet', type=str,
+    help='scheduler job name (default: probiesNet)')
+parser.add_argument(
+    '--mini-batch-size', action='store', default=32, type=int,
+    help='mini-batch size (default: 32)', metavar='NUM')
+parser.add_argument(
+    '--reader-prototext', action='store', default='probies_v2.prototext', type=str,
+    help='data to use (default: probies_v2.prototext, 20K data)')
+parser.add_argument(
+    '--num-epochs', action='store', default=100, type=int,
+    help='number of epochs (default: 100)', metavar='NUM')
+#Add reader prototext
+
+lbann.contrib.args.add_optimizer_arguments(parser)
+args = parser.parse_args()
+
+
+# Default data reader
+cur_dir = dirname(abspath(__file__))
+data_reader_prototext = join(cur_dir,
+                             'data',
+                             args.reader_prototext)
+
+print("DATA READER ", data_reader_prototext)
+
+images = lbann.Input(data_field='samples')
+responses = lbann.Input(data_field='responses')
+
+num_labels = 5
+
+images = lbann.Reshape(images, dims='1 300 300')
+
+
+pred = model.PROBIESNet(num_labels)(images)
+
+mse = lbann.MeanSquaredError([responses, pred])
+
+# Pearson Correlation
+# rho(x,y) = covariance(x,y) / sqrt( variance(x) * variance(y) )
+pearson_r_cov = lbann.Covariance([pred, responses],
+				   name="pearson_r_cov")
+
+pearson_r_var1 = lbann.Variance(responses,
+				 name="pearson_r_var1")
+
+pearson_r_var2 = lbann.Variance(pred,
+				name="pearson_r_var2")
+
+
+pearson_r_mult = lbann.Multiply([pearson_r_var1, pearson_r_var2],
+				    name="pearson_r_mult")
+
+pearson_r_sqrt = lbann.Sqrt(pearson_r_mult,
+		            name="pearson_r_sqrt")
+
+eps = lbann.Constant(value=1e-07,hint_layer=pearson_r_sqrt)
+pearson_r = lbann.Divide([pearson_r_cov, lbann.Add(pearson_r_sqrt,eps)],
+			     name="pearson_r")
+
+
+metrics = [lbann.Metric(mse, name='mse')]
+metrics.append(lbann.Metric(pearson_r, name='pearson_r'))
+
+callbacks = [lbann.CallbackPrint(),
+             lbann.CallbackTimer()]
+
+
+layers = list(lbann.traverse_layer_graph([images, responses]))
+model = lbann.Model(args.num_epochs,
+                    layers=layers,
+                    metrics=metrics,
+                    objective_function=mse,
+                    callbacks=callbacks)
+
+
+
+# Load data reader from prototext
+data_reader_proto = lbann.lbann_pb2.LbannPB()
+with open(data_reader_prototext, 'r') as f:
+    txtf.Merge(f.read(), data_reader_proto)
+data_reader_proto = data_reader_proto.data_reader
+
+# Setup trainer
+trainer = lbann.Trainer(mini_batch_size=args.mini_batch_size)
+
+# Setup optimizer
+opt = lbann.Adam(learn_rate=0.0002,beta1=0.9,beta2=0.99,eps=1e-8)
+
+# Run experiment
+kwargs = lbann.contrib.args.get_scheduler_kwargs(args)
+lbann.contrib.launcher.run(trainer, model, data_reader_proto, opt,
+                           lbann_args=" --use_data_store --preload_data_store",
+                           job_name=args.job_name,
+                           **kwargs)
diff --git a/include/lbann/data_readers/data_reader_HDF5.hpp b/include/lbann/data_readers/data_reader_HDF5.hpp
index a3fff29d506..80fc2475e6d 100644
--- a/include/lbann/data_readers/data_reader_HDF5.hpp
+++ b/include/lbann/data_readers/data_reader_HDF5.hpp
@@ -69,17 +69,17 @@ class hdf5_data_reader : public data_reader_sample_list<sample_list_hdf5<std::st
 
   bool fetch_datum(CPUMat& X, int data_id, int mb_idx) override
   {
-    return fetch_data_field("datum", X, data_id, mb_idx);
+    return fetch_data_field(INPUT_DATA_TYPE_SAMPLES, X, data_id, mb_idx);
   }
 
   bool fetch_response(CPUMat& Y, int data_id, int mb_idx) override
   {
-    return fetch_data_field("response", Y, data_id, mb_idx);
+    return fetch_data_field(INPUT_DATA_TYPE_RESPONSES, Y, data_id, mb_idx);
   }
 
   bool fetch_label(CPUMat& Y, int data_id, int mb_idx) override
   {
-    return fetch_data_field("label", Y, data_id, mb_idx);
+    return fetch_data_field(INPUT_DATA_TYPE_LABELS, Y, data_id, mb_idx);
   }
 
   /** @brief Sets the name of the yaml experiment file */
@@ -105,22 +105,22 @@ class hdf5_data_reader : public data_reader_sample_list<sample_list_hdf5<std::st
 
   const std::vector<int> get_data_dims() const override
   {
-    return get_data_dims("datum");
+    return get_data_dims(INPUT_DATA_TYPE_SAMPLES);
   }
 
   int get_linearized_data_size() const override
   {
-    return get_linearized_size("datum");
+    return get_linearized_size(INPUT_DATA_TYPE_SAMPLES);
   }
 
   int get_linearized_response_size() const override
   {
-    return get_linearized_size("response");
+    return get_linearized_size(INPUT_DATA_TYPE_RESPONSES);
   }
 
   int get_linearized_label_size() const override
   {
-    return get_linearized_size("label");
+    return get_linearized_size(INPUT_DATA_TYPE_LABELS);
   }
 
   int get_num_labels() const override { return get_linearized_label_size(); }
@@ -337,6 +337,9 @@ class hdf5_data_reader : public data_reader_sample_list<sample_list_hdf5<std::st
   /** sanity check; call after adjust_metadata */
   void test_that_all_nodes_contain_metadata(conduit::Node& node);
 
+  bool get_delete_packed_fields() { return m_delete_packed_fields; }
+  void set_delete_packed_fields(bool flag) { m_delete_packed_fields = flag; }
+
   //=========================================================================
   // template declarations follow
   //=========================================================================
diff --git a/src/data_readers/data_reader_HDF5.cpp b/src/data_readers/data_reader_HDF5.cpp
index 41742ebb953..01ad0de403c 100644
--- a/src/data_readers/data_reader_HDF5.cpp
+++ b/src/data_readers/data_reader_HDF5.cpp
@@ -224,6 +224,10 @@ void hdf5_data_reader::load()
   // with data store
   // TODO MRW
   // opts->set_option("preload_data_store", true);
+  if (!arg_parser.get<bool>(USE_DATA_STORE)) {
+    LBANN_ERROR("HDF5 data reader requires the data store.",
+                "Set command line arguments --use_data_store --preload_data_store");
+  }
 
   // Load the sample list(s)
   data_reader_sample_list::load();
@@ -914,6 +918,11 @@ bool hdf5_data_reader::fetch_data_field(data_field_type data_field,
   std::string dtype;
   const void* d = get_data(data_id, data_field, n_elts, dtype);
 
+  if ((El::Int)n_elts != Y.Height()) {
+    LBANN_ERROR("data field ", data_field, " has ", n_elts,
+                " elements, but the matrix only has a linearized size (height) of ",
+                Y.Height());
+  }
   if (dtype == "float64") {
     const conduit::float64* data = reinterpret_cast<const conduit::float64*>(d);
     for (size_t j = 0; j < n_elts; ++j) {
@@ -964,24 +973,25 @@ void hdf5_data_reader::print_metadata(std::ostream& os)
         "role: "
      << get_role() << std::endl;
 
+  std::unordered_map<std::string, conduit::Node*> leaves;
+  std::unordered_map<std::string, conduit::Node*> mp;
   // load a sample from file, applying all transformations along the way;
   // need to do this so we can get the correct dtypes
   conduit::Node populated_node;
-  size_t index = random() % m_shuffled_indices.size();
-  bool ignore_failure = true;
-  load_sample(populated_node, index, ignore_failure);
-
-  // get all leaves (data fields)
-  std::unordered_map<std::string, conduit::Node*> leaves;
-  get_leaves(&populated_node, leaves);
-
-  // build map: field_name -> Node
-  std::unordered_map<std::string, conduit::Node*> mp;
-  for (const auto& t : leaves) {
-    size_t j = t.first.find('/');
-    mp[t.first.substr(j + 1)] = t.second;
+  if(m_shuffled_indices.size() != 0) {
+    size_t index = random() % m_shuffled_indices.size();
+    bool ignore_failure = true;
+    load_sample(populated_node, index, ignore_failure);
+
+    // get all leaves (data fields)
+    get_leaves(&populated_node, leaves);
+
+    // build map: field_name -> Node
+    for (const auto& t : leaves) {
+      size_t j = t.first.find('/');
+      mp[t.first.substr(j + 1)] = t.second;
+    }
   }
-
   // print metadata and data types for all other nodes
   for (const auto& t : m_useme_node_map_ptrs) {
     const std::string& name = t.first;
@@ -1041,7 +1051,7 @@ const void* hdf5_data_reader::get_data(const size_t sample_id_in,
 {
 
   // get the pathname to the data, and verify it exists in the conduit::Node
-  const conduit::Node& node = m_data_store->get_conduit_node(sample_id_in);
+  const conduit::Node& node = get_data_store().get_conduit_node(sample_id_in);
   std::ostringstream ss;
   ss << node.child(0).name() + "/" << data_field;
   if (!node.has_path(ss.str())) {
diff --git a/src/data_readers/unit_test/data_reader_HDF5_hrrl_data_test.cpp b/src/data_readers/unit_test/data_reader_HDF5_hrrl_data_test.cpp
index 19a904cb6c1..7000c4b5329 100644
--- a/src/data_readers/unit_test/data_reader_HDF5_hrrl_data_test.cpp
+++ b/src/data_readers/unit_test/data_reader_HDF5_hrrl_data_test.cpp
@@ -43,7 +43,7 @@
 
 // Use a different schema to create a different packing
 const std::string packed_hdf5_hrrl_data_sample_id_foobar =R"FOO(000000334:
-    datum: [456.288777930614, 231.340700217946, 113.528447010204, 115.115911382861, 116.716861149023, 118.331222098325, 120.52874207647, 122.175220756304, 123.834871115725, 125.507597035081, 126.011234474661, 123.587537036166]
+    samples: [456.288777930614, 231.340700217946, 113.528447010204, 115.115911382861, 116.716861149023, 118.331222098325, 120.52874207647, 122.175220756304, 123.834871115725, 125.507597035081, 126.011234474661, 123.587537036166]
     foo: [15.2486634101312, 0.0426354341969429]
     bar: [64037572840.4818, 5.34505173275895]
     baz: [32.6826031770453]
@@ -51,7 +51,7 @@ const std::string packed_hdf5_hrrl_data_sample_id_foobar =R"FOO(000000334:
 
 // Now change the ordering fields in the experiment schema to change the field order
 const std::string packed_hdf5_hrrl_data_sample_id_foobar_permute =R"FOO(000000334:
-    datum: [456.288777930614, 231.340700217946, 113.528447010204, 115.115911382861, 116.716861149023, 118.331222098325, 120.52874207647, 122.175220756304, 123.834871115725, 125.507597035081, 126.011234474661, 123.587537036166]
+    samples: [456.288777930614, 231.340700217946, 113.528447010204, 115.115911382861, 116.716861149023, 118.331222098325, 120.52874207647, 122.175220756304, 123.834871115725, 125.507597035081, 126.011234474661, 123.587537036166]
     foo: [0.0426354341969429, 15.2486634101312]
     bar: [5.34505173275895, 64037572840.4818]
     baz: [32.6826031770453]
@@ -60,7 +60,7 @@ const std::string packed_hdf5_hrrl_data_sample_id_foobar_permute =R"FOO(00000033
 const std::string hdf5_hrrl_experiment_schema_test_foobar = R"AurthurDent(
 Image:
   metadata:
-    pack: "datum"
+    pack: "samples"
     coerce: "float"
 Epmax:
   metadata:
@@ -83,7 +83,7 @@ const std::string hdf5_hrrl_experiment_schema_test_foobar = R"AurthurDent(
 const std::string hdf5_hrrl_experiment_schema_test_foobar_permute = R"AurthurDent(
 Image:
   metadata:
-    pack: "datum"
+    pack: "samples"
     coerce: "float"
 Epmax:
   metadata:
@@ -148,6 +148,11 @@ class DataReaderHDF5WhiteboxTester
     x.set_experiment_schema(s);
   }
 
+  void print_metadata(lbann::hdf5_data_reader& x,
+                      std::ostream& os = std::cout) {
+    x.print_metadata(os);
+  }
+
 };
 
 TEST_CASE("hdf5 data reader transform tests",
@@ -264,6 +269,7 @@ TEST_CASE("hdf5 data reader pack test",
     // Instantiate a fresh copy of the sample
     conduit::Node test_node;
     test_node.parse(hdf5_hrrl_data_sample_id, "yaml");
+    //white_box_tester.print_metadata(*hdf5_dr);
     white_box_tester.pack(*hdf5_dr, test_node, index);
 
     // Get the reference packed node
@@ -271,7 +277,7 @@ TEST_CASE("hdf5 data reader pack test",
     ref_node.parse(packed_hdf5_hrrl_data_sample_id, "yaml");
 
     // Check each of the fields to ensure that the packing worked
-    std::vector<std::string>fields = {"datum", "response"};
+    std::vector<std::string>fields = {"samples", "responses"};
     for (auto f : fields) {
       const std::string ref_pathname("000000334/" + f);
       size_t ref_num_elements = ref_node[ref_pathname].dtype().number_of_elements();
@@ -304,7 +310,7 @@ TEST_CASE("hdf5 data reader pack test",
     ref_node.parse(packed_hdf5_hrrl_data_sample_id_foobar, "yaml");
 
     // Check each of the fields to ensure that the packing worked
-    std::vector<std::string>fields = {"datum", "foo", "bar", "baz"};
+    std::vector<std::string>fields = {"samples", "foo", "bar", "baz"};
     for (auto f : fields) {
       const std::string ref_pathname("000000334/" + f);
       size_t ref_num_elements = ref_node[ref_pathname].dtype().number_of_elements();
@@ -338,7 +344,7 @@ TEST_CASE("hdf5 data reader pack test",
     ref_node.parse(packed_hdf5_hrrl_data_sample_id_foobar_permute, "yaml");
 
     // Check each of the fields to ensure that the packing worked
-    std::vector<std::string>fields = {"datum", "foo", "bar", "baz"};
+    std::vector<std::string>fields = {"samples", "foo", "bar", "baz"};
     for (auto f : fields) {
       const std::string ref_pathname("000000334/" + f);
       size_t ref_num_elements = ref_node[ref_pathname].dtype().number_of_elements();
diff --git a/src/data_readers/unit_test/data_reader_HDF5_hrrl_public_api.cpp b/src/data_readers/unit_test/data_reader_HDF5_hrrl_public_api.cpp
index abe7095ae5a..c289d9f0248 100644
--- a/src/data_readers/unit_test/data_reader_HDF5_hrrl_public_api.cpp
+++ b/src/data_readers/unit_test/data_reader_HDF5_hrrl_public_api.cpp
@@ -37,6 +37,7 @@
 #include <conduit/conduit.hpp>
 #include <cstdlib>
 #include <errno.h>
+#include <ostream>
 #include <string.h>
 
 #include "lbann/data_readers/data_reader_HDF5.hpp"
@@ -84,6 +85,16 @@ class DataReaderHDF5WhiteboxTester
     x.set_experiment_schema(s);
   }
 
+  void print_metadata(lbann::hdf5_data_reader& x,
+                      std::ostream& os = std::cout) {
+    x.print_metadata(os);
+  }
+
+  void set_delete_packed_fields(lbann::hdf5_data_reader& x,
+                                const bool flag) {
+    x.set_delete_packed_fields(flag);
+  }
+
   bool fetch_data_field(lbann::hdf5_data_reader& dr,
                         lbann::data_field_type data_field,
                         lbann::CPUMat& X,
@@ -93,6 +104,30 @@ class DataReaderHDF5WhiteboxTester
     return dr.fetch_data_field(data_field, X, data_id, mb_idx);
   }
 
+  bool fetch_datum(lbann::hdf5_data_reader& dr,
+                   lbann::CPUMat& X,
+                   int data_id,
+                   int mb_idx)
+  {
+    return dr.fetch_datum(X, data_id, mb_idx);
+  }
+
+  bool fetch_response(lbann::hdf5_data_reader& dr,
+                      lbann::CPUMat& X,
+                      int data_id,
+                      int mb_idx)
+  {
+    return dr.fetch_response(X, data_id, mb_idx);
+  }
+
+  bool fetch_label(lbann::hdf5_data_reader& dr,
+                   lbann::CPUMat& X,
+                   int data_id,
+                   int mb_idx)
+  {
+    return dr.fetch_label(X, data_id, mb_idx);
+  }
+
   int get_linearized_size(lbann::hdf5_data_reader& dr,
                           lbann::data_field_type const& data_field)
   {
@@ -141,6 +176,13 @@ TEST_CASE("hdf5 data reader data field fetch tests",
   auto& ds = hdf5_dr->get_data_store();
   conduit::Node& ds_node = ds.get_empty_node(index);
   ds_node.parse(hdf5_hrrl_data_sample_id, "yaml");
+
+  // Once the node is constructed pack the requested fields into the node
+  size_t sample_index = 334;
+  white_box_tester.set_delete_packed_fields(*hdf5_dr, false);
+  white_box_tester.pack(*hdf5_dr, ds_node, sample_index);
+  white_box_tester.construct_linearized_size_lookup_tables(*hdf5_dr, ds_node);
+
   ds.set_preloaded_conduit_node(index, ds_node);
 
   // Initalize a per-trainer I/O thread pool
@@ -179,6 +221,53 @@ TEST_CASE("hdf5 data reader data field fetch tests",
     }
   }
 
+  SECTION("fetch datum and responses")
+  {
+    lbann::CPUMat X;
+
+    // Get the reference packed node
+    conduit::Node packed_ref_node;
+    packed_ref_node.parse(packed_hdf5_hrrl_data_sample_id, "yaml");
+
+    std::vector<std::string> fields = {};
+    fields.emplace_back(
+                        GENERATE(std::string("samples"), std::string("responses")));
+    for (auto& data_field : fields) {
+      X.Resize(white_box_tester.get_linearized_size(*hdf5_dr, data_field), num_samples);
+
+      auto io_rng = lbann::set_io_generators_local_index(0);
+      for (auto j = 0; j < num_samples; j++) {
+        if (data_field == INPUT_DATA_TYPE_SAMPLES) {
+          white_box_tester.fetch_datum(*hdf5_dr, X, 0, j);
+          CHECK_THROWS(white_box_tester.fetch_label(*hdf5_dr, X, 0, j));
+          CHECK_THROWS(white_box_tester.fetch_response(*hdf5_dr, X, 0, j));
+        }else if (data_field == INPUT_DATA_TYPE_LABELS) {
+        }else if (data_field == INPUT_DATA_TYPE_RESPONSES) {
+          CHECK_THROWS(white_box_tester.fetch_datum(*hdf5_dr, X, 0, j));
+          CHECK_THROWS(white_box_tester.fetch_label(*hdf5_dr, X, 0, j));
+          white_box_tester.fetch_response(*hdf5_dr, X, 0, j);
+        }
+
+      }
+
+      const std::string test_pathname("000000334/" + data_field);
+      for (El::Int j = 0; j < num_samples; j++) {
+        // Check to make sure that each element in the transformed field are properly normalized
+        size_t num_elements = packed_ref_node[test_pathname].dtype().number_of_elements();
+        if(num_elements > 1) {
+          for(size_t i = 0; i < num_elements; i++) {
+            double check = packed_ref_node[test_pathname].as_double_array()[i];
+            CHECK(X(i,0) == Approx(check));
+          }
+        }
+        else {
+          double check = packed_ref_node[test_pathname].as_double();
+          CHECK(X(0,0) == Approx(check));
+        }
+      }
+    }
+  }
+
   SECTION("fetch invalid data field")
   {
     lbann::CPUMat X;
diff --git a/src/data_readers/unit_test/test_data/hdf5_hrrl_experiment_schema.yaml b/src/data_readers/unit_test/test_data/hdf5_hrrl_experiment_schema.yaml
index 3f38286f3fd..571984db3fd 100644
--- a/src/data_readers/unit_test/test_data/hdf5_hrrl_experiment_schema.yaml
+++ b/src/data_readers/unit_test/test_data/hdf5_hrrl_experiment_schema.yaml
@@ -1,21 +1,21 @@
 const std::string hdf5_hrrl_experiment_schema = R"AurthurDent(
 Image:
   metadata:
-    pack: "datum"
+    pack: "samples"
     coerce: "float"
 Epmax:
   metadata:
-    pack: "response"
+    pack: "responses"
 Etot:
   metadata:
-    pack: "response"
+    pack: "responses"
 N:
   metadata:
-    pack: "response"
+    pack: "responses"
 T:
   metadata:
-    pack: "response"
+    pack: "responses"
 alpha:
   metadata:
-    pack: "response"
+    pack: "responses"
 )AurthurDent";
diff --git a/src/data_readers/unit_test/test_data/hdf5_hrrl_test_data_and_schema.yaml b/src/data_readers/unit_test/test_data/hdf5_hrrl_test_data_and_schema.yaml
index 7a0967b3f92..7db2f785a98 100644
--- a/src/data_readers/unit_test/test_data/hdf5_hrrl_test_data_and_schema.yaml
+++ b/src/data_readers/unit_test/test_data/hdf5_hrrl_test_data_and_schema.yaml
@@ -22,8 +22,8 @@ const std::string hdf5_hrrl_data_sample_id =R"FOO(000000334:
 
 // Here is how the HRRL data expects its sample to be packed for this experiment schema
 const std::string packed_hdf5_hrrl_data_sample_id =R"FOO(000000334:
-    datum: [456.288777930614, 231.340700217946, 113.528447010204, 115.115911382861, 116.716861149023, 118.331222098325, 120.52874207647, 122.175220756304, 123.834871115725, 125.507597035081, 126.011234474661, 123.587537036166]
-    response: [15.2486634101312, 0.0426354341969429, 64037572840.4818, 5.34505173275895, 32.6826031770453]
+    samples: [456.288777930614, 231.340700217946, 113.528447010204, 115.115911382861, 116.716861149023, 118.331222098325, 120.52874207647, 122.175220756304, 123.834871115725, 125.507597035081, 126.011234474661, 123.587537036166]
+    responses: [15.2486634101312, 0.0426354341969429, 64037572840.4818, 5.34505173275895, 32.6826031770453]
 )FOO";
 
 const std::string hdf5_hrrl_data_schema_test = R"AurthurDent(

From 3fb3846351985caf3c6284137847a13bb7fd0fd3 Mon Sep 17 00:00:00 2001
From: Brian Van Essen <vanessen1@llnl.gov>
Date: Wed, 13 Oct 2021 10:17:18 -0700
Subject: [PATCH 18/37] Updated the concretize call to have the test flag it is
 provided so (#1973)

that the hashes match.  Also add support for copying the
compile_commands.json out of the build to the root directory.
---
 scripts/build_lbann.sh | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/scripts/build_lbann.sh b/scripts/build_lbann.sh
index 1a6db70a3a6..fef6acd28e9 100755
--- a/scripts/build_lbann.sh
+++ b/scripts/build_lbann.sh
@@ -669,7 +669,7 @@ if [[ "${SPEC_ONLY}" == "TRUE" ]]; then
 fi
 
 # Try to concretize the environment and catch the return code
-CMD="spack concretize"
+CMD="spack concretize ${INSTALL_BUILD_EXTRAS}"
 echo ${CMD} | tee -a ${LOG}
 [[ -z "${DRY_RUN:-}" ]] && { ${CMD} || exit_on_failure "${CMD}"; }
 
@@ -783,6 +783,13 @@ if [[ -z "${USER_BUILD:-}" ]]; then
     CMD="ln -s ${LBANN_HOME}/spack-build-${LBANN_SPEC_HASH} ${LINK_DIR}"
     echo ${CMD} | tee -a ${LOG}
     [[ -z "${DRY_RUN:-}" ]] && { ${CMD} || exit_on_failure "${CMD}"; }
+
+    # Copy the compile_commands.json file to LBANN_HOME
+    if [[ -e "${LBANN_HOME}/spack-build-${LBANN_SPEC_HASH}/compile_commands.json" ]]; then
+        CMD="cp ${LBANN_HOME}/spack-build-${LBANN_SPEC_HASH}/compile_commands.json ${LBANN_HOME}/compile_commands.json"
+        echo ${CMD} | tee -a ${LOG}
+        [[ -z "${DRY_RUN:-}" ]] && { ${CMD} || exit_on_failure "${CMD}"; }
+    fi
 fi
 
 ##########################################################################################

From 10e00a38badd414b9104b774c1cfd2fd869b57b3 Mon Sep 17 00:00:00 2001
From: Katie Graham <50850420+graham63@users.noreply.github.com>
Date: Thu, 14 Oct 2021 08:12:48 -0700
Subject: [PATCH 19/37] Add onnx callback (#1861)

Added the ability to export a trained model into an ONNX format.
Currently this has support for a limited number of layers.

* added export_onnx callback

* export_onnx.hpp/.cpp

* Added callback to CMakelists

* latest version of onnx callback

* latest version of onnx callback

* resolved merge conflict

* export_onnx.hpp/.cpp

* Added callback to CMakelists

* latest version of onnx callback

* latest version of onnx callback

* Added fill_onnx_node functionality to some layers

* latest version of onnx stuff

* working onnx export (sort of) with no weights

* added LBANN_HAS_ONNX option, cleaned up FIXMEs

* fixed issue with LBANN_HAS_ONNX option

* added export_onnx callback

export_onnx.hpp/.cpp

Added callback to CMakelists

latest version of onnx callback

latest version of onnx callback

resolved merge conflict

export_onnx.hpp/.cpp

Added fill_onnx_node functionality to some layers

latest version of onnx stuff

working onnx export (sort of) with no weights

added LBANN_HAS_ONNX option, cleaned up FIXMEs

fixed issue with LBANN_HAS_ONNX option

* Fixup callbacks.proto

* Fixup callbacks.proto again

* removed cout statements from export onnx callback

* fixed CB number in callbacks.proto

* addressed comments for PR

* removed debugging code from CMakeLists

* minor fixes to copyright and include files

* Added documentation to fill_onnx_node() and get_onnx_op_type()

* Modified cmake to exclude export_onnx

* resolved merge conflict

* export_onnx.hpp/.cpp

* Added callback to CMakelists

* latest version of onnx callback

* latest version of onnx callback

* added export_onnx callback

* export_onnx.hpp/.cpp

* Added callback to CMakelists

* latest version of onnx callback

* latest version of onnx callback

* Added fill_onnx_node functionality to some layers

* latest version of onnx stuff

* working onnx export (sort of) with no weights

* added LBANN_HAS_ONNX option, cleaned up FIXMEs

* fixed issue with LBANN_HAS_ONNX option

* removed cout statements from export onnx callback

* added export_onnx callback

export_onnx.hpp/.cpp

Added callback to CMakelists

latest version of onnx callback

latest version of onnx callback

resolved merge conflict

export_onnx.hpp/.cpp

Added fill_onnx_node functionality to some layers

latest version of onnx stuff

working onnx export (sort of) with no weights

added LBANN_HAS_ONNX option, cleaned up FIXMEs

fixed issue with LBANN_HAS_ONNX option

* Fixup callbacks.proto again

* fixed CB number in callbacks.proto

* addressed comments for PR

* removed debugging code from CMakeLists

* minor fixes to copyright and include files

* Modified cmake to exclude export_onnx

* resolved merge conflict

* export_onnx.hpp/.cpp

* Added callback to CMakelists

* latest version of onnx callback

* latest version of onnx callback

* added export_onnx callback

* export_onnx.hpp/.cpp

* Added callback to CMakelists

* latest version of onnx callback

* latest version of onnx callback

* Added fill_onnx_node functionality to some layers

* latest version of onnx stuff

* working onnx export (sort of) with no weights

* added LBANN_HAS_ONNX option, cleaned up FIXMEs

* fixed issue with LBANN_HAS_ONNX option

* removed cout statements from export onnx callback

* added export_onnx callback

export_onnx.hpp/.cpp

Added callback to CMakelists

latest version of onnx callback

latest version of onnx callback

resolved merge conflict

export_onnx.hpp/.cpp

Added fill_onnx_node functionality to some layers

latest version of onnx stuff

working onnx export (sort of) with no weights

added LBANN_HAS_ONNX option, cleaned up FIXMEs

fixed issue with LBANN_HAS_ONNX option

* Fixup callbacks.proto again

* fixed CB number in callbacks.proto

* addressed comments for PR

* removed debugging code from CMakeLists

* minor fixes to copyright and include files

* Added documentation to fill_onnx_node() and get_onnx_op_type()

* Modified cmake to exclude export_onnx

* resolved issue introduced by rebase error

* Update src/callbacks/unit_test/export_onnx_test.cpp

Co-authored-by: Tom Benson <30674819+benson31@users.noreply.github.com>

* move fill_onnx_node definitions out-of-line.

* fix copy-paste error

Co-authored-by: Brian C. Van Essen <vanessen1@llnl.gov>
Co-authored-by: Tom Benson <30674819+benson31@users.noreply.github.com>
Co-authored-by: Thomas R. Benson <benson31@llnl.gov>
---
 CMakeLists.txt                                |  17 ++-
 cmake/configure_files/lbann_config.hpp.in     |   1 +
 include/lbann/callbacks/CMakeLists.txt        |   3 +
 include/lbann/callbacks/export_onnx.hpp       |  94 ++++++++++++++++
 include/lbann/layers/activations/identity.hpp |   4 +
 include/lbann/layers/activations/relu.hpp     |   4 +
 include/lbann/layers/io/input_layer.hpp       |   5 +
 include/lbann/layers/layer.hpp                |  25 ++++-
 .../lbann/layers/loss/mean_squared_error.hpp  |  43 +++++++-
 include/lbann/layers/transform/dummy.hpp      |   5 +
 include/lbann/layers/transform/evaluation.hpp |  36 +++++-
 include/lbann/layers/transform/split.hpp      |  23 +++-
 src/callbacks/CMakeLists.txt                  |   4 +
 src/callbacks/export_onnx.cpp                 | 104 ++++++++++++++++++
 src/callbacks/unit_test/CMakeLists.txt        |   4 +
 src/callbacks/unit_test/export_onnx_test.cpp  |  57 ++++++++++
 src/layers/io/input_layer.cpp                 |  24 ++++
 src/layers/layer.cpp                          |  25 ++++-
 src/proto/callbacks.proto                     |   7 ++
 src/proto/factories/callback_factory.cpp      |   7 ++
 src/utils/unit_test/CMakeLists.txt            |   1 -
 21 files changed, 483 insertions(+), 10 deletions(-)
 create mode 100644 include/lbann/callbacks/export_onnx.hpp
 create mode 100644 src/callbacks/export_onnx.cpp
 create mode 100644 src/callbacks/unit_test/export_onnx_test.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0487f22bf85..d1eab94d8d8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -146,6 +146,9 @@ option(LBANN_WITH_UNIT_TESTING
 option(LBANN_WITH_ADDRESS_SANITIZER
   "Try clang-style use of ASAN (-fsanitize=address)" OFF)
 
+option(LBANN_WITH_ONNX
+  "Enable exporting onnx model." OFF)
+
 # Use deterministic GPU algorithms and layer operations
 option(LBANN_DETERMINISTIC
   "Use deterministic algorithms as much as possible." OFF)
@@ -593,6 +596,13 @@ endif (LBANN_WITH_UNIT_TESTING)
 # Handle the documentation
 add_subdirectory(docs)
 
+# Include onnx dependency
+if (LBANN_WITH_ONNX)
+  set (LBANN_HAS_ONNX TRUE)
+  find_package(ONNX CONFIG REQUIRED)
+endif ()
+
+
 ################################################################
 # Build LBANN
 ################################################################
@@ -624,6 +634,9 @@ target_include_directories(lbann PUBLIC
 target_compile_features(lbann PUBLIC cxx_std_17)
 
 # Use the IMPORTED targets when possible.
+if (LBANN_HAS_ONNX)
+  target_link_libraries(lbann PUBLIC onnx)
+endif ()
 target_link_libraries(lbann PUBLIC LbannProto)
 if (LBANN_HAS_TBINF)
   target_link_libraries(lbann PUBLIC TBinf)
@@ -864,12 +877,9 @@ if (LBANN_HAS_PYTHON_FRONTEND)
   set(_PY_INSTALL_MSG
     "
 \n**********************************************************************
-
 A Python package has been installed to ${_PY_INSTALL_DIR}. To use
 this package, be sure to add this directory to your PYTHONPATH, e.g.:
-
   export PYTHONPATH=${_PY_INSTALL_DIR}:\\$\{PYTHONPATH\}
-
 **********************************************************************\n
 ")
   install(CODE
@@ -962,6 +972,7 @@ append_str_tf(_str
   LBANN_HAS_TBINF
   LBANN_HAS_VTUNE
   LBANN_HAS_BOOST
+  LBANN_HAS_ONNX
   LBANN_NVPROF
   )
 string(APPEND _str
diff --git a/cmake/configure_files/lbann_config.hpp.in b/cmake/configure_files/lbann_config.hpp.in
index 1449c3c4975..bf138700c65 100644
--- a/cmake/configure_files/lbann_config.hpp.in
+++ b/cmake/configure_files/lbann_config.hpp.in
@@ -41,6 +41,7 @@
 #cmakedefine LBANN_HAS_EMBEDDED_PYTHON
 #cmakedefine LBANN_HAS_SHMEM
 #cmakedefine LBANN_HAS_LARGESCALE_NODE2VEC
+#cmakedefine LBANN_HAS_ONNX
 
 #cmakedefine LBANN_DETERMINISTIC
 
diff --git a/include/lbann/callbacks/CMakeLists.txt b/include/lbann/callbacks/CMakeLists.txt
index c31322a3eba..0d60cc2b4e1 100644
--- a/include/lbann/callbacks/CMakeLists.txt
+++ b/include/lbann/callbacks/CMakeLists.txt
@@ -45,5 +45,8 @@ set_full_path(THIS_DIR_HEADERS
   variable_minibatch.hpp
   )
 
+if(LBANN_HAS_ONNX)
+  list(APPEND THIS_DIR_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/export_onnx.hpp)
+endif ()
 # Propagate the files up the tree
 set(HEADERS "${HEADERS}" "${THIS_DIR_HEADERS}" PARENT_SCOPE)
diff --git a/include/lbann/callbacks/export_onnx.hpp b/include/lbann/callbacks/export_onnx.hpp
new file mode 100644
index 00000000000..def8d1bd530
--- /dev/null
+++ b/include/lbann/callbacks/export_onnx.hpp
@@ -0,0 +1,94 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+//
+// export_onnx .hpp .cpp - Exports trained model to onnx format
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef LBANN_CALLBACKS_EXPORT_ONNX_HPP_INCLUDED
+#define LBANN_CALLBACKS_EXPORT_ONNX_HPP_INCLUDED
+
+#include "lbann/callbacks/callback.hpp"
+#include <lbann/base.hpp>
+
+#include <onnx/onnx_pb.h>
+
+#include <google/protobuf/message.h>
+
+#include <iostream>
+#include <memory>
+#include <vector>
+
+namespace lbann {
+namespace callback {
+
+/** @class export_onnx
+ *  @brief Callback to export a trained model to onnx format
+ */
+class export_onnx : public callback_base {
+
+public:
+  /** @brief export_onnx Constructor.
+   *  @param output_file Output filename (default = lbann_output.onnx)
+   *  @param print_debug_string Option to print debug string to stdout
+   */
+  export_onnx(bool print_debug_string = false,
+              std::string output_file = "lbann_output.onnx");
+
+  /** @brief Copy interface */
+  export_onnx* copy() const override {
+    return new export_onnx(*this);
+  }
+
+  /** @brief Return name of callback */
+  std::string name() const override { return "export_onnx"; }
+
+  /* @brief gather model info */
+  void on_setup_end(model* m) override;
+
+  /* @brief gather graph/layer info */
+  void on_train_begin(model* m) override;
+
+private:
+
+  /* @brief option to print onnx debug string */
+  bool m_print_debug_string;
+
+  /* @brief name of output file. Default = lbann_output.onnx */
+  std::string m_output_file;
+
+  /* @brief onnx ModelProto object */
+  onnx::ModelProto mp_;
+
+}; // class export_onnx
+
+std::unique_ptr<callback_base>
+build_export_onnx_callback_from_pbuf(
+  const google::protobuf::Message& proto_msg,
+  const std::shared_ptr<lbann_summary>&);
+
+} // namespace callback
+} // namespace lbann
+
+#endif  // LBANN_CALLBACKS_EXPORT_ONNX_HPP_INCLUDED
diff --git a/include/lbann/layers/activations/identity.hpp b/include/lbann/layers/activations/identity.hpp
index 2e79c74816a..bb1669ac8e1 100644
--- a/include/lbann/layers/activations/identity.hpp
+++ b/include/lbann/layers/activations/identity.hpp
@@ -61,6 +61,10 @@ class identity_layer : public data_type_layer<TensorDataType> {
   data_layout get_data_layout() const override { return Layout; }
   El::Device get_device_allocation() const override { return Device; }
 
+#ifdef LBANN_HAS_ONNX
+  std::string get_onnx_op_type() const override { return "Identity"; }
+#endif // LBANN_HAS_ONNX
+
   /** @name Serialization */
   ///@{
 
diff --git a/include/lbann/layers/activations/relu.hpp b/include/lbann/layers/activations/relu.hpp
index 4c93db4edd2..302cda77ea4 100644
--- a/include/lbann/layers/activations/relu.hpp
+++ b/include/lbann/layers/activations/relu.hpp
@@ -59,6 +59,10 @@ class relu_layer : public data_type_layer<TensorDataType> {
   data_layout get_data_layout() const override { return T_layout; }
   El::Device get_device_allocation() const override { return Dev; }
 
+#ifdef LBANN_HAS_ONNX
+  std::string get_onnx_op_type() const override { return "Relu"; }
+#endif // LBANN_HAS_ONNX
+
   /** @name Serialization */
   ///@{
 
diff --git a/include/lbann/layers/io/input_layer.hpp b/include/lbann/layers/io/input_layer.hpp
index 0571769323e..8aa75b658b1 100644
--- a/include/lbann/layers/io/input_layer.hpp
+++ b/include/lbann/layers/io/input_layer.hpp
@@ -122,6 +122,11 @@ class input_layer : public data_type_layer<TensorDataType> {
   }
 
   std::string get_type() const override { return "input"; }
+
+#ifdef LBANN_HAS_ONNX
+  void fill_onnx_node(onnx::GraphProto& graph) const override;
+#endif // LBANN_HAS_ONNX
+
   // description get_description() const override {
   //   auto desc = io_layer<TensorDataType>::get_description();
   //   return desc;
diff --git a/include/lbann/layers/layer.hpp b/include/lbann/layers/layer.hpp
index 296a7291d15..50286590229 100644
--- a/include/lbann/layers/layer.hpp
+++ b/include/lbann/layers/layer.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -45,6 +45,9 @@
 #endif // LBANN_HAS_DISTCONV
 #include <string>
 #include <vector>
+#ifdef LBANN_HAS_ONNX
+#include <onnx/onnx_pb.h>
+#endif
 
 /** @brief A utility macro for easily defining default-constructed sub-class
  *  builders.*/
@@ -470,6 +473,26 @@ class Layer {
   /** @brief Write layer to proto file */
   virtual void write_proto(lbann_data::Layer* proto) const;
 
+#ifdef LBANN_HAS_ONNX
+  /** @brief Add layer specific data to onnx graph
+   *  Fills layer specific data in onnx nodes. Needs to
+   *  be overridden by layers that cannot be represented
+   *  by a single onnx operator type.
+   */
+  virtual void fill_onnx_node(onnx::GraphProto& graph) const;
+
+private:
+  /** @brief Get ONNX operator type
+   *  Unsupported layers and layers that cannot be represented
+   *  by a single ONNX operator type will throw an LBANN error.
+   *  The operator types for these layers must be included
+   *  manually in the overridden fill_onnx_node() function.
+   */
+  virtual std::string get_onnx_op_type() const;
+#endif // LBANN_HAS_ONNX
+
+public:
+
   const Layer& get_parent_layer(size_t index=0) const;
   const Layer& get_child_layer(size_t index=0) const;
 
diff --git a/include/lbann/layers/loss/mean_squared_error.hpp b/include/lbann/layers/loss/mean_squared_error.hpp
index f576d26a911..ae4424953cc 100644
--- a/include/lbann/layers/loss/mean_squared_error.hpp
+++ b/include/lbann/layers/loss/mean_squared_error.hpp
@@ -103,6 +103,10 @@ class mean_squared_error_layer : public data_type_layer<TensorDataType> {
   data_layout get_data_layout() const override { return T_layout; }
   El::Device get_device_allocation() const override { return Dev; }
 
+#ifdef LBANN_HAS_ONNX
+  void fill_onnx_node(onnx::GraphProto& graph) const override;
+#endif // LBANN_HAS_ONNX
+
   void setup_dims(DataReaderMetaData& dr_metadata) override {
     data_type_layer<TensorDataType>::setup_dims(dr_metadata);
     this->set_output_dims({1});
@@ -256,9 +260,46 @@ class mean_squared_error_layer : public data_type_layer<TensorDataType> {
                                                           this->get_distconv_adapter().get_error_signals(1));
   }
 #endif // LBANN_HAS_DISTCONV
-
 };
 
+#ifdef LBANN_HAS_ONNX
+template <typename T, data_layout L, El::Device D>
+void mean_squared_error_layer<T, L, D>::fill_onnx_node(
+  onnx::GraphProto& graph) const
+{
+  auto* diff = graph.add_node();
+  for (auto const* parent : this->get_parent_layers()) {
+    size_t idx = parent->find_child_layer_index(*this);
+    diff->add_input(parent->get_name() + "_" + std::to_string(idx));
+  }
+  diff->add_output(this->get_name() + "diff_0");
+  diff->set_name(this->get_name() + "diff");
+  diff->set_op_type("Sub");
+  diff->set_domain("");
+  diff->set_doc_string("First node representing Mean Squared Error Layer");
+
+  auto* square = graph.add_node();
+  square->add_input(diff->output(0));
+  square->add_input(diff->output(0));
+  square->add_output("square_0");
+  square->set_name("square");
+  square->set_op_type("Mul");
+  square->set_domain("");
+  square->set_doc_string("Second node representing Mean Squared Error Layer");
+
+  auto* mse = graph.add_node();
+  mse->add_input(square->output(0));
+  for (auto const* child : this->get_child_layers()) {
+    size_t idx = this->find_child_layer_index(*child);
+    mse->add_output(this->get_name() + "_" + std::to_string(idx));
+  }
+  mse->set_name("mse");
+  mse->set_op_type("Mean");
+  mse->set_domain("");
+  mse->set_doc_string("Third node representing Mean Squared Error Layer");
+}
+#endif // LBANN_HAS_ONNX
+
 #ifdef LBANN_HAS_DISTCONV
 template <typename TensorDataType, data_layout T_layout, El::Device Dev>
 const mean_squared_error_distconv_adapter<TensorDataType, T_layout, Dev>&
diff --git a/include/lbann/layers/transform/dummy.hpp b/include/lbann/layers/transform/dummy.hpp
index 551a31a82c4..70d3d5ae02d 100644
--- a/include/lbann/layers/transform/dummy.hpp
+++ b/include/lbann/layers/transform/dummy.hpp
@@ -57,6 +57,11 @@ class dummy_layer : public data_type_layer<TensorDataType> {
   std::string get_type() const override { return "dummy"; }
   data_layout get_data_layout() const override { return T_layout; }
   El::Device get_device_allocation() const override { return Dev; }
+
+#ifdef LBANN_HAS_ONNX
+  void fill_onnx_node(onnx::GraphProto& graph) const override {}
+#endif // LBANN_HAS_ONNX
+
 protected:
 
   friend class cereal::access;
diff --git a/include/lbann/layers/transform/evaluation.hpp b/include/lbann/layers/transform/evaluation.hpp
index e3476cfa070..9d371f0c926 100644
--- a/include/lbann/layers/transform/evaluation.hpp
+++ b/include/lbann/layers/transform/evaluation.hpp
@@ -119,15 +119,47 @@ class evaluation_layer : public abstract_evaluation_layer<TensorDataType> {
   data_layout get_data_layout() const override { return T_layout; }
   El::Device get_device_allocation() const override { return Dev; }
 
+#ifdef LBANN_HAS_ONNX
+  void fill_onnx_node(onnx::GraphProto& graph) const override;
+#endif // LBANN_HAS_ONNX
+
 protected:
   friend class cereal::access;
   evaluation_layer()
     : evaluation_layer(nullptr)
   {}
-
-
 };
 
+#ifdef LBANN_HAS_ONNX
+template <typename T, data_layout L, El::Device D>
+void evaluation_layer<T, L, D>::fill_onnx_node(onnx::GraphProto& graph) const
+{
+  auto* node = graph.add_node();
+  for (auto const* parent : this->get_parent_layers()) {
+    size_t idx = parent->find_child_layer_index(*this);
+    node->add_input(parent->get_name() + "_" + std::to_string(idx));
+  }
+  node->add_output(this->get_name());
+  node->set_name(this->get_name());
+  node->set_op_type("Identity");
+  node->set_domain("");
+  node->set_doc_string(this->get_type());
+
+  // Add graph output
+  auto graph_output = graph.add_output();
+  graph_output->set_name(this->get_name());
+  auto* graph_output_type = graph_output->mutable_type();
+  // FIXME: enum type. 1 is float
+  graph_output_type->mutable_tensor_type()->set_elem_type(1);
+
+  auto* dims =
+    graph_output_type->mutable_tensor_type()->mutable_shape()->add_dim();
+  dims->set_dim_param("batch");
+  dims = graph_output_type->mutable_tensor_type()->mutable_shape()->add_dim();
+  dims->set_dim_value(1);
+}
+#endif // LBANN_HAS_ONNX
+
 LBANN_DEFINE_LAYER_BUILDER(evaluation);
 
 #ifndef LBANN_EVALUATION_LAYER_INSTANTIATE
diff --git a/include/lbann/layers/transform/split.hpp b/include/lbann/layers/transform/split.hpp
index 2a8271f1ddb..f00736ecc31 100644
--- a/include/lbann/layers/transform/split.hpp
+++ b/include/lbann/layers/transform/split.hpp
@@ -72,7 +72,9 @@ class split_layer : public data_type_layer<TensorDataType> {
   data_layout get_data_layout() const override { return T_layout; }
   El::Device get_device_allocation() const override { return Dev; }
 
-
+#ifdef LBANN_HAS_ONNX
+  void fill_onnx_node(onnx::GraphProto& graph) const override;
+#endif // LBANN_HAS_ONNX
 
 protected:
 
@@ -239,6 +241,25 @@ class split_layer : public data_type_layer<TensorDataType> {
 #endif // LBANN_HAS_DISTCONV
 };
 
+#ifdef LBANN_HAS_ONNX
+template <typename T, data_layout L, El::Device D>
+void split_layer<T, L, D>::fill_onnx_node(onnx::GraphProto& graph) const
+{
+  const auto& parent = this->get_parent_layer();
+  const size_t idx_in_parent = parent.find_child_layer_index(*this);
+  for (auto const* child : this->get_child_layers()) {
+    auto* node = graph.add_node();
+    node->add_input(parent.get_name() + "_" + std::to_string(idx_in_parent));
+    size_t idx = this->find_child_layer_index(*child);
+    node->add_output(this->get_name() + "_" + std::to_string(idx));
+    node->set_name(this->get_name() + std::to_string(idx));
+    node->set_op_type("Identity");
+    node->set_domain("");
+    node->set_doc_string(this->get_type());
+  }
+}
+#endif // LBANN_HAS_ONNX
+
 #ifdef LBANN_HAS_DISTCONV
 template <typename TensorDataType, data_layout T_layout, El::Device Dev>
 split_distconv_adapter<TensorDataType, T_layout, Dev>&
diff --git a/src/callbacks/CMakeLists.txt b/src/callbacks/CMakeLists.txt
index 8f64509d1c9..2ac107d64b3 100644
--- a/src/callbacks/CMakeLists.txt
+++ b/src/callbacks/CMakeLists.txt
@@ -47,6 +47,10 @@ set_full_path(THIS_DIR_SOURCES
   variable_minibatch.cpp
 )
 
+if(LBANN_HAS_ONNX)
+  list(APPEND THIS_DIR_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/export_onnx.cpp)
+endif ()
+
 # Propagate the files up the tree
 set(SOURCES "${SOURCES}" "${THIS_DIR_SOURCES}" PARENT_SCOPE)
 set(GPU_SOURCES "${GPU_SOURCES}" "${THIS_DIR_CU_SOURCES}" PARENT_SCOPE)
diff --git a/src/callbacks/export_onnx.cpp b/src/callbacks/export_onnx.cpp
new file mode 100644
index 00000000000..9f6f63b7481
--- /dev/null
+++ b/src/callbacks/export_onnx.cpp
@@ -0,0 +1,104 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+//
+// export_onnx .hpp .cpp - Exports trained model to onnx format
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/callbacks/export_onnx.hpp"
+
+#include "lbann/layers/io/input_layer.hpp"
+#include "lbann/proto/helpers.hpp"
+#include "lbann/utils/factory.hpp"
+#include "lbann/utils/summary_impl.hpp"
+
+#include <callbacks.pb.h>
+
+#include <fstream>
+#include <iostream>
+#include <string>
+
+
+namespace lbann {
+namespace callback {
+
+export_onnx::export_onnx(bool print_debug_string,
+                         std::string output_file)
+  : callback_base(/*batch_interval=*/1),
+    m_print_debug_string(print_debug_string),
+    m_output_file(output_file)
+{}
+
+void export_onnx::on_setup_end(model* m)
+{
+  mp_.set_ir_version(7);
+  auto* opset = mp_.add_opset_import();
+  // The empty string ("") domain indicates the operators defined
+  // as part of the ONNX specification; other domains correspond
+  // to operator sets of other vendors (e.g., they can be used to
+  // provide vendor-specific extensions to ONNX)
+  opset->set_domain("");
+  opset->set_version(11);
+
+  mp_.set_producer_name("LBANN");
+  mp_.set_producer_version(LBANN_MAKE_STR(LBANN_VERSION));
+  mp_.set_domain("lbann/LLNL/com.github");
+  mp_.set_model_version(1);
+  mp_.set_doc_string("Livermore Big Artificial Neural Network");
+}
+
+void export_onnx::on_train_begin(model* m)
+{
+  // graph info
+  auto* gp = mp_.mutable_graph();
+  gp->set_name(m->get_name());
+
+  auto const layers = m->get_layers();
+  for (auto const* layer : layers) {
+    layer->fill_onnx_node(*gp);
+  }
+  gp->set_doc_string(m->get_name());
+
+  auto rank = m->get_comm()->get_rank_in_trainer();
+  if( rank == 0 ) {
+    std::ofstream onnx_out(m_output_file);
+    mp_.SerializeToOstream(&onnx_out);
+
+    if(m_print_debug_string)
+      std::cout << mp_.DebugString() << std::endl;
+  }
+}
+
+std::unique_ptr<callback_base>
+build_export_onnx_callback_from_pbuf(
+  const google::protobuf::Message& proto_msg,
+  const std::shared_ptr<lbann_summary>&) {
+  const auto& params =
+    dynamic_cast<const lbann_data::Callback::CallbackExportOnnx&>(proto_msg);
+  return make_unique<export_onnx>(
+    params.print_debug_string(),
+    params.output_file());
+}
+}// namespace callback
+}// namespace lbann
diff --git a/src/callbacks/unit_test/CMakeLists.txt b/src/callbacks/unit_test/CMakeLists.txt
index de585b1aeb4..2d871cd27cc 100644
--- a/src/callbacks/unit_test/CMakeLists.txt
+++ b/src/callbacks/unit_test/CMakeLists.txt
@@ -2,6 +2,10 @@ set_full_path(THIS_DIR_MPI_CATCH2_TEST_FILES
   print_statistics_test.cpp
   )
 
+if(LBANN_HAS_ONNX)
+    list(APPEND THIS_DIR_MPI_CATCH2_TEST_FILES ${CMAKE_CURRENT_SOURCE_DIR}/export_onnx_test.cpp)
+endif ()
+
 set(LBANN_MPI_CATCH2_TEST_FILES
   "${LBANN_MPI_CATCH2_TEST_FILES}"
   "${THIS_DIR_MPI_CATCH2_TEST_FILES}" PARENT_SCOPE)
diff --git a/src/callbacks/unit_test/export_onnx_test.cpp b/src/callbacks/unit_test/export_onnx_test.cpp
new file mode 100644
index 00000000000..ca86b0465e0
--- /dev/null
+++ b/src/callbacks/unit_test/export_onnx_test.cpp
@@ -0,0 +1,57 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <catch2/catch.hpp>
+#include "TestHelpers.hpp"
+#include "MPITestHelpers.hpp"
+
+// The code being tested
+#include <lbann/callbacks/export_onnx.hpp>
+
+#include "lbann/callbacks/callback.hpp"
+#include <google/protobuf/message.h>
+#include <lbann/base.hpp>
+
+#include <onnx/onnx_pb.h>
+
+#include <iostream>
+#include <memory>
+
+
+using unit_test::utilities::IsValidPtr;
+TEST_CASE("Serializing \"export onnx\" callback",
+          "[mpi][callback][serialize][onnx]")
+{
+  using CallbackType = lbann::callback::export_onnx;
+
+  auto& world_comm = unit_test::utilities::current_world_comm();
+  auto const& g = world_comm.get_trainer_grid();
+  lbann::utils::grid_manager mgr(g);
+
+  CallbackType callback();
+
+  // FIXME: Testing if onnx is defined? How to do this?
+}
diff --git a/src/layers/io/input_layer.cpp b/src/layers/io/input_layer.cpp
index 7cc9e9c9fd1..0754ff5aa56 100644
--- a/src/layers/io/input_layer.cpp
+++ b/src/layers/io/input_layer.cpp
@@ -154,6 +154,30 @@ get_data_dims(DataReaderMetaData& dr_metadata, int child_index) const {
   return std::vector<int>(1, 0);
 }
 
+#ifdef LBANN_HAS_ONNX
+template <typename T, data_layout L, El::Device D>
+void input_layer<T,L,D>::fill_onnx_node(onnx::GraphProto& graph) const
+{
+  auto child_layers = this->get_child_layers();
+  for (auto const* child : this->get_child_layers()) {
+    auto idx = this->find_child_layer_index(*child);
+    auto* input = graph.add_input();
+    input->set_name(this->get_name() + "_" + std::to_string(idx));
+    auto* input_type = input->mutable_type();
+    // FIXME: enum type. 1 is float. Get TensorDataType?
+    input_type->mutable_tensor_type()->set_elem_type(1);
+
+    auto* dims = input_type->mutable_tensor_type()->mutable_shape()->add_dim();
+    dims->set_dim_param("batch");
+    for (auto const& dim : this->get_output_dims(idx)) {
+      dims = input_type->mutable_tensor_type()->mutable_shape()->add_dim();
+      dims->set_dim_value(dim);
+    }
+    input->set_doc_string("Input layer info");
+  }
+}
+#endif // LBANN_HAS_ONNX
+
 #ifdef LBANN_HAS_DISTCONV
 template <typename TensorDataType,
           data_layout T_layout, El::Device Dev>
diff --git a/src/layers/layer.cpp b/src/layers/layer.cpp
index e8a83321cb6..95e50606404 100644
--- a/src/layers/layer.cpp
+++ b/src/layers/layer.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -527,6 +527,29 @@ void Layer::write_proto(lbann_data::Layer* proto) const {
     get_weights(i).write_proto(weight_proto);
   }
 }
+#ifdef LBANN_HAS_ONNX
+void Layer::fill_onnx_node(onnx::GraphProto& graph) const {
+  auto* node = graph.add_node();
+  for(auto const* parent : this->get_parent_layers()) {
+    size_t idx = parent->find_child_layer_index(*this);
+    node->add_input(parent->get_name() + "_" + std::to_string(idx));
+  }
+  for(auto const* child : this->get_child_layers()) {
+    size_t idx = this->find_child_layer_index(*child);
+    node->add_output(this->get_name() + "_" + std::to_string(idx));
+  }
+  node->set_name(this->get_name());
+  node->set_op_type(this->get_onnx_op_type());
+  node->set_domain("");
+  node->set_doc_string(this->get_type());
+}
+
+std::string Layer::get_onnx_op_type() const {
+  LBANN_ERROR( "ONNX export is not supported for ", this->get_type(),
+               " layer \"",this->get_name(),"\"");
+  return "";
+}
+#endif // LBANN_HAS_ONNX
 
 const Layer& Layer::get_parent_layer(size_t index) const {
   if (index >= m_parent_layers.size()) {
diff --git a/src/proto/callbacks.proto b/src/proto/callbacks.proto
index bc4f962026e..7fea821b6a6 100644
--- a/src/proto/callbacks.proto
+++ b/src/proto/callbacks.proto
@@ -82,6 +82,7 @@ message Callback {
     CallbackPerturbLearningRate perturb_learning_rate = 50;
     CallbackComputeModelSize compute_model_size = 51;
     CallbackPerturbWeights perturb_weights = 52;
+    CallbackExportOnnx export_onnx = 53;
   }
 
   message CallbackLTFB {
@@ -417,4 +418,10 @@ message Callback {
     string output_name = 5;
     int64 batch_interval = 6;
   }
+
+  /** @brief Export trained model in onnx format */
+  message CallbackExportOnnx {
+    bool print_debug_string = 1; // print debug string to stdout
+    string output_file = 2; // name of onnx output file
+  }
 }
diff --git a/src/proto/factories/callback_factory.cpp b/src/proto/factories/callback_factory.cpp
index a064ab2db02..7d3a476743c 100644
--- a/src/proto/factories/callback_factory.cpp
+++ b/src/proto/factories/callback_factory.cpp
@@ -44,6 +44,9 @@
 #include "lbann/callbacks/dump_outputs.hpp"
 #include "lbann/callbacks/dump_weights.hpp"
 #include "lbann/callbacks/early_stopping.hpp"
+#ifdef LBANN_HAS_ONNX
+#include "lbann/callbacks/export_onnx.hpp"
+#endif // LBANN_HAS_ONNX
 #include "lbann/callbacks/gpu_memory_usage.hpp"
 #include "lbann/callbacks/hang.hpp"
 #include "lbann/callbacks/imcomm.hpp"
@@ -142,6 +145,10 @@ void register_default_builders(factory_type& factory)
                            build_dump_weights_callback_from_pbuf);
   factory.register_builder("CallbackEarlyStopping",
                            build_early_stopping_callback_from_pbuf);
+#ifdef LBANN_HAS_ONNX
+  factory.register_builder("CallbackExportOnnx",
+                           build_export_onnx_callback_from_pbuf);
+#endif // LBANN_HAS_ONNX
   factory.register_builder("CallbackGPUMemoryUsage",
                            build_gpu_memory_usage_callback_from_pbuf);
   factory.register_builder("CallbackHang",
diff --git a/src/utils/unit_test/CMakeLists.txt b/src/utils/unit_test/CMakeLists.txt
index e2a0d3593d7..c9b121eec20 100644
--- a/src/utils/unit_test/CMakeLists.txt
+++ b/src/utils/unit_test/CMakeLists.txt
@@ -14,7 +14,6 @@ set_full_path(THIS_DIR_SEQ_CATCH2_TEST_FILES
   serialize_matrix_test.cpp
   timer_test.cpp
   type_erased_matrix_test.cpp
-
   stubs/preset_env_accessor.hpp
   stubs/preset_env_accessor.cpp
   )

From 2458baa9542a234521818c77abece67908b27218 Mon Sep 17 00:00:00 2001
From: Brian Van Essen <vanessen1@llnl.gov>
Date: Thu, 14 Oct 2021 09:06:00 -0700
Subject: [PATCH 20/37] Move state from data reader to data coordinator (#1744)

* Mini-batch size is now passed into the data reader fetch functions
from the data coordinator, rather than having the data reader
calculate what is the current mini-batch size.

* Removed jag_partitioned field since it was deprecated by sample lists

* Removed the set_and is_master functions and m_master field since that is available from get_comm()->am_world_master()

* Removed cached value for rank which really should be rank_in_trainer and was labled as rank in model

* Switched data readers over to using the global get_trainer() function call to find the trainer

* Updated data readers to use get_rank_in_trainer rather than local field

* Fixed typo

* Updated tests to pass number of samples
---
 .../data_readers/compound_data_reader.hpp     | 14 ----
 include/lbann/data_readers/data_reader.hpp    | 67 ++----------------
 .../data_readers/data_reader_jag_conduit.hpp  |  2 -
 .../data_reader_sample_list_impl.hpp          |  8 +--
 src/callbacks/debug_io.cpp                    |  2 +-
 .../buffered_data_coordinator.cpp             | 10 ++-
 src/data_coordinator/data_coordinator.cpp     |  1 -
 src/data_readers/data_reader.cpp              | 68 +++----------------
 src/data_readers/data_reader_HDF5.cpp         | 16 ++---
 src/data_readers/data_reader_csv.cpp          |  2 +-
 src/data_readers/data_reader_image.cpp        | 16 ++---
 src/data_readers/data_reader_imagenet.cpp     |  2 +-
 src/data_readers/data_reader_jag_conduit.cpp  | 31 +++------
 .../data_reader_merge_features.cpp            |  2 +-
 src/data_readers/data_reader_mnist.cpp        |  6 +-
 .../data_reader_npz_ras_lipid.cpp             | 20 +++---
 .../data_reader_numpy_npz_conduit.cpp         | 20 +++---
 src/data_readers/data_reader_python.cpp       |  2 +-
 src/data_readers/data_reader_smiles.cpp       | 22 +++---
 .../data_reader_HDF5_hrrl_public_api.cpp      |  1 -
 .../data_reader_synthetic_test_public_api.cpp | 15 ++--
 src/proto/proto_common.cpp                    |  2 -
 src/trainers/trainer.cpp                      |  3 -
 23 files changed, 98 insertions(+), 234 deletions(-)

diff --git a/include/lbann/data_readers/compound_data_reader.hpp b/include/lbann/data_readers/compound_data_reader.hpp
index e34d6bae62c..25530217384 100644
--- a/include/lbann/data_readers/compound_data_reader.hpp
+++ b/include/lbann/data_readers/compound_data_reader.hpp
@@ -92,20 +92,6 @@ class generic_compound_data_reader : public generic_data_reader {
     }
   }
 
-  void set_master(bool m) override {
-    generic_data_reader::set_master(m);
-    for (auto&& reader : m_data_readers) {
-      reader->set_master(m);
-    }
-  }
-
-  void set_rank(int rank) override {
-    generic_data_reader::set_rank(rank);
-    for (auto&& reader : m_data_readers) {
-      reader->set_rank(rank);
-    }
-  }
-
   /// needed to support data_store_merge_samples
   std::vector<generic_data_reader*> & get_data_readers() {
     return m_data_readers;
diff --git a/include/lbann/data_readers/data_reader.hpp b/include/lbann/data_readers/data_reader.hpp
index 332f5753257..8f804b696b8 100644
--- a/include/lbann/data_readers/data_reader.hpp
+++ b/include/lbann/data_readers/data_reader.hpp
@@ -70,9 +70,6 @@ class generic_data_reader {
  public:
   using unused_index_map_t = std::map<execution_mode,std::vector<int>>;
 
- #define JAG_NOOP_VOID if (m_jag_partitioned) { return; }
- #define JAG_NOOP_INT if (m_jag_partitioned) { return 0; }
-
   /**
    * ctor
    */
@@ -97,7 +94,6 @@ class generic_data_reader {
       m_global_last_mini_batch_size(0),
       m_world_master_mini_batch_adjustment(0),
       m_num_parallel_readers(0),
-      m_rank_in_model(0),
       m_max_files_to_load(0),
       m_file_dir(""),
       m_data_sample_list(""),
@@ -106,14 +102,10 @@ class generic_data_reader {
       m_shuffle(shuffle),
       m_absolute_sample_count(0),
       m_use_percent(1.0),
-      m_master(false),
-      m_gan_labelling(false), // default, not GAN
-      m_gan_label_value(
-        0), // If GAN, default for fake label, discriminator model
+      m_gan_labelling(false), //default, not GAN
+      m_gan_label_value(0),  //If GAN, default for fake label, discriminator model
       m_io_thread_pool(nullptr),
-      m_jag_partitioned(false),
       m_keep_sample_order(false),
-      m_trainer(nullptr),
       m_issue_warning(true)
   {
     // By default only support fetching input samples
@@ -131,7 +123,6 @@ class generic_data_reader {
   /// set the comm object
   void set_comm(lbann_comm *comm) {
     m_comm = comm;
-    set_master(comm->am_world_master());
   }
 
   /// returns a (possibly nullptr) to comm
@@ -306,7 +297,7 @@ class generic_data_reader {
 
   /** @brief Fetch a mini-batch worth of data, including samples, labels, responses (as appropriate) */
   int fetch(std::map<data_field_type, CPUMat*>& input_buffers,
-            El::Matrix<El::Int>& indices_fetched);
+            El::Matrix<El::Int>& indices_fetched, size_t mb_size);
 
   /** @brief Check to see if the data reader supports this specific data field
    */
@@ -448,7 +439,6 @@ class generic_data_reader {
   }
   /// Set the mini batch size across all models (global)
   void set_global_mini_batch_size(const int s) {
-    JAG_NOOP_VOID
     m_global_mini_batch_size = s;
   }
   /// Return the mini_batch_size across all models (global)
@@ -457,7 +447,6 @@ class generic_data_reader {
   }
   /// Set the mini batch stride
   void set_stride_to_next_mini_batch(const int s) {
-    JAG_NOOP_VOID
     m_stride_to_next_mini_batch = s;
   }
   /// Return the mini batch stride.
@@ -466,7 +455,6 @@ class generic_data_reader {
   }
   /// Set the sample stride
   void set_sample_stride(const int s) {
-    JAG_NOOP_VOID
     m_sample_stride = s;
   }
   /// Return the sample stride.
@@ -483,7 +471,6 @@ class generic_data_reader {
   }
   /// Return the base offset.
   virtual void set_base_offset(const int s) {
-    JAG_NOOP_VOID
     m_base_offset = s;
   }
   /// Return the base offset.
@@ -492,7 +479,6 @@ class generic_data_reader {
   }
   /// Set the model offset
   void set_model_offset(const int s) {
-    JAG_NOOP_VOID
     m_model_offset = s;
   }
   /// Return the model offset.
@@ -501,7 +487,6 @@ class generic_data_reader {
   }
   /// Set the last mini batch size
   void set_last_mini_batch_size(const int s) {
-    JAG_NOOP_VOID
     m_last_mini_batch_size = s;
   }
   /// Return the last mini batch size
@@ -510,7 +495,6 @@ class generic_data_reader {
   }
   /// Set the last mini batch size across all models (global)
   void set_global_last_mini_batch_size(const int s) {
-    JAG_NOOP_VOID
     m_global_last_mini_batch_size = s;
   }
   /// Return the last mini batch size across all models (global)
@@ -519,7 +503,6 @@ class generic_data_reader {
   }
   /// Set the world master mini batch adjustment (global)
   void set_world_master_mini_batch_adjustment(const int s) {
-    JAG_NOOP_VOID
     m_world_master_mini_batch_adjustment = s;
   }
   /// Return the world master mini batch adjustment (global)
@@ -528,7 +511,6 @@ class generic_data_reader {
   }
   /// Set the last mini batch stride
   void set_stride_to_last_mini_batch(const int s) {
-    JAG_NOOP_VOID
     m_stride_to_last_mini_batch = s;
   }
   /// Return the last mini batch stride
@@ -616,26 +598,6 @@ class generic_data_reader {
     return  m_current_mini_batch_idx;
   }
 
-  /// only the master may write to cerr or cout; primarily for use in debugging during development
-  virtual void set_master(bool m) {
-    m_master = m;
-  }
-
-  /// only the master may write to cerr or cout; primarily for use in debugging during development
-  bool is_master() const {
-    return m_master;
-  }
-
-  /// Allow the reader to know where it is in the model hierarchy
-  virtual void set_rank(int rank) {
-    m_rank_in_model = rank;
-  }
-
-  /// Allow the reader to know where it is in the model hierarchy
-  int get_rank() const {
-    return m_rank_in_model;
-  }
-
   /**
    * Optionally resizes the shuffled indices based on the data reader
    * prototext settings: absolute_sample_count, percent_of_data_to_use.
@@ -714,12 +676,9 @@ class generic_data_reader {
 
   virtual bool priming_data_store() const;
 
-  void set_trainer(trainer *t) { m_trainer = t; }
-
-  trainer& get_trainer() const {
-    if(m_trainer == nullptr) { LBANN_ERROR("get_trainer called with nullptr"); }
-    return *m_trainer;
-  }
+  /// experimental; used to ensure all readers for jag_conduit_hdf5
+  /// have identical shuffled indices
+  virtual void post_update() {}
 
   /** Set the transform pipeline this data reader will use. */
   void set_transform_pipeline(transform::transform_pipeline&& tp) {
@@ -836,6 +795,7 @@ class generic_data_reader {
   /// Shuffle indices and profide a random number generator
   virtual void shuffle_indices(rng_gen& gen);
 
+public:
   int m_mini_batch_size;
   int m_current_pos;
   /// Batch Stride is typically batch_size, but may be a multiple of batch size if there are multiple readers
@@ -873,7 +833,6 @@ class generic_data_reader {
 
   int m_num_parallel_readers; /// How many parallel readers are being used
 
-  int m_rank_in_model;  /// What is the rank of the data reader within a given model
   size_t m_max_files_to_load;
   std::string m_file_dir;
   std::string m_local_file_dir;
@@ -887,8 +846,6 @@ class generic_data_reader {
   int m_first_n;
   std::string m_role;
 
-  bool m_master;
-
   /** @brief Print the return values from various get_X methods to file
    *
    * For use in unit testing. Only the master prints.
@@ -928,20 +885,10 @@ class generic_data_reader {
 
   observer_ptr<thread_pool> m_io_thread_pool;
 
-  /// special handling for 1B jag; each reader
-  /// owns a unique subset of the data
-  bool m_jag_partitioned;
-
   /** Whether to keep the order of loaded samples same as it is in the
    *  file to make testing and validation easier */
   bool m_keep_sample_order;
 
-  /// called by fetch_data a single time if m_jag_partitioned = true;
-  /// this sets various member variables (num_iterations, m_reset_mini_batch_index,
-  /// etc.
-  void set_jag_variables(int mb_size);
-  trainer *m_trainer;
-
   /** Transform pipeline for preprocessing data. */
   transform::transform_pipeline m_transform_pipeline;
 
diff --git a/include/lbann/data_readers/data_reader_jag_conduit.hpp b/include/lbann/data_readers/data_reader_jag_conduit.hpp
index a0bd786b23d..6c0b1fc8452 100644
--- a/include/lbann/data_readers/data_reader_jag_conduit.hpp
+++ b/include/lbann/data_readers/data_reader_jag_conduit.hpp
@@ -171,8 +171,6 @@ class data_reader_jag_conduit : public generic_data_reader {
   bool has_list_per_model() const override { return m_list_per_model; }
   bool has_list_per_trainer() const override { return m_list_per_trainer; }
 
-
-
   /// Return the number of measurement views
   unsigned int get_num_img_srcs() const;
   /// Return the linearized size of an image
diff --git a/include/lbann/data_readers/data_reader_sample_list_impl.hpp b/include/lbann/data_readers/data_reader_sample_list_impl.hpp
index fa8c0c33029..748d90ac48e 100644
--- a/include/lbann/data_readers/data_reader_sample_list_impl.hpp
+++ b/include/lbann/data_readers/data_reader_sample_list_impl.hpp
@@ -85,7 +85,7 @@ void data_reader_sample_list<SampleListT>::shuffle_indices(rng_gen& gen)
 template <typename SampleListT>
 void data_reader_sample_list<SampleListT>::load()
 {
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "starting data_reader_sample_list::load()\n";
   }
   const std::string sample_list_file = get_data_sample_list();
@@ -129,7 +129,7 @@ void data_reader_sample_list<SampleListT>::load_list_of_samples(
   else {
     m_sample_list.load(sample_list_file, *(this->m_comm), true);
   }
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "Time to load sample list '" << sample_list_file
               << "': " << get_time() - tm1 << std::endl;
   }
@@ -138,7 +138,7 @@ void data_reader_sample_list<SampleListT>::load_list_of_samples(
   double tm3 = get_time();
   m_sample_list.all_gather_packed_lists(*m_comm);
 
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "Time to gather sample list '" << sample_list_file
               << "': " << get_time() - tm3 << std::endl;
   }
@@ -160,7 +160,7 @@ void data_reader_sample_list<SampleListT>::load_list_of_samples_from_archive(
   iarchive(m_sample_list); // Read the data from the archive
   double tm2 = get_time();
 
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "Time to load sample list from archive: " << tm2 - tm1
               << std::endl;
   }
diff --git a/src/callbacks/debug_io.cpp b/src/callbacks/debug_io.cpp
index d225966179d..d3891955941 100644
--- a/src/callbacks/debug_io.cpp
+++ b/src/callbacks/debug_io.cpp
@@ -104,7 +104,7 @@ void debug_io::print_phase_start(model *m, execution_mode mode) {
   generic_data_reader* data_reader = dc.get_data_reader(mode);
   const auto& step = c.get_step();
 
-  if(data_reader->get_rank() < data_reader->get_num_parallel_readers()) {
+  if(m->get_comm()->get_rank_in_trainer() < data_reader->get_num_parallel_readers()) {
     std::cout << "[" << m->get_comm()->get_trainer_rank()
               << "." << m->get_comm()->get_rank_in_trainer()
               << "] @" << 0 << "." << step
diff --git a/src/data_coordinator/buffered_data_coordinator.cpp b/src/data_coordinator/buffered_data_coordinator.cpp
index badd3fe813b..43ec0677562 100644
--- a/src/data_coordinator/buffered_data_coordinator.cpp
+++ b/src/data_coordinator/buffered_data_coordinator.cpp
@@ -143,8 +143,16 @@ int buffered_data_coordinator<TensorDataType>::fetch_to_local_matrix(data_buffer
     for(auto& b : buf.m_input_buffers) {
       local_input_buffers[b.first] = static_cast<CPUMat*>(&(b.second->Matrix()));
     }
+
+    // Compute the size of the current mini-batch
+
+    int loaded_batch_size = dr->get_loaded_mini_batch_size();
+    const int end_pos = std::min(static_cast<size_t>(dr->m_current_pos+loaded_batch_size), dr->m_shuffled_indices.size());
+    const int mb_size = std::min(El::Int{((end_pos - dr->m_current_pos) + dr->m_sample_stride - 1) / dr->m_sample_stride},
+                                 local_input_buffers[INPUT_DATA_TYPE_SAMPLES]->Width());
+
     /** @brief Each rank will fetch a mini-batch worth of data into it's buffer */
-    buf.m_num_samples_fetched = dr->fetch(local_input_buffers, buf.m_indices_fetched_per_mb);
+    buf.m_num_samples_fetched = dr->fetch(local_input_buffers, buf.m_indices_fetched_per_mb, mb_size);
 
     bool data_valid = (buf.m_num_samples_fetched > 0);
     if(data_valid) {
diff --git a/src/data_coordinator/data_coordinator.cpp b/src/data_coordinator/data_coordinator.cpp
index 61251019483..8b57915f908 100644
--- a/src/data_coordinator/data_coordinator.cpp
+++ b/src/data_coordinator/data_coordinator.cpp
@@ -60,7 +60,6 @@ void data_coordinator::setup(thread_pool& io_thread_pool, int max_mini_batch_siz
     if (!dr.second) continue;
     dr.second->setup(m_io_thread_pool->get_num_threads(),
                      m_io_thread_pool);
-    dr.second->set_rank(m_comm->get_rank_in_trainer());
   }
 
   /** Calculate how many iterations are required for training, testing,
diff --git a/src/data_readers/data_reader.cpp b/src/data_readers/data_reader.cpp
index b3f7070324d..c8723b6212e 100644
--- a/src/data_readers/data_reader.cpp
+++ b/src/data_readers/data_reader.cpp
@@ -85,7 +85,8 @@ void generic_data_reader::setup(int num_io_threads, observer_ptr<thread_pool> io
 
 int lbann::generic_data_reader::fetch(
   std::map<data_field_type, CPUMat*>& input_buffers,
-  El::Matrix<El::Int>& indices_fetched)
+  El::Matrix<El::Int>& indices_fetched,
+  size_t mb_size)
 {
   // Check to make sure that a valid map was passed
   if (input_buffers.empty()) {
@@ -121,8 +122,8 @@ int lbann::generic_data_reader::fetch(
 
 #ifdef DEBUG
   if (m_current_pos == 0) {
-    if (is_master()) {
-      std::cout << "role: " << get_role() << " model: " << m_trainer->get_name()
+    if (get_comm()->am_world_master()) {
+      std::cout << "role: " << get_role() << " model: " << get_trainer().get_name()
                 << " shuffled indices: ";
       for (size_t j=0; j<15; j++) {
         std::cout << m_shuffled_indices[j] << " ";
@@ -134,12 +135,6 @@ int lbann::generic_data_reader::fetch(
 
   int loaded_batch_size = get_loaded_mini_batch_size();
 
-  const int end_pos = std::min(static_cast<size_t>(m_current_pos+loaded_batch_size), m_shuffled_indices.size());
-  const int mb_size =
-    std::min(El::Int{((end_pos - m_current_pos) + m_sample_stride - 1) /
-                     m_sample_stride},
-             buffer_width);
-
   if(!position_valid()) {
     if(position_is_overrun()) {
       return 0;
@@ -156,12 +151,6 @@ int lbann::generic_data_reader::fetch(
     preprocess_data_source(t);
   }
 
-  static bool fix_jag = true;
-  if (m_jag_partitioned && fix_jag) {
-    fix_jag = false;
-    set_jag_variables(mb_size);
-  }
-
   // BVE FIXME - for the time being certain data fields, such as the
   // labels have to be zeroed out because they will typically only
   // set the single index corresponding to the categorical value.
@@ -288,32 +277,6 @@ bool lbann::generic_data_reader::fetch_data_block(
   return true;
 }
 
-void lbann::generic_data_reader::set_jag_variables(int mb_size) {
-  // all min_batches have the same number of indices;
-  // this probably causes a few indices to be discarded,
-  // but with 1B indices, who cares?
-  int mb_max = m_comm->trainer_allreduce<int>(mb_size, El::mpi::MAX);
-  m_num_iterations_per_epoch = m_shuffled_indices.size() / mb_max;
-
-  m_last_mini_batch_size = m_mini_batch_size;
-  m_global_mini_batch_size = m_mini_batch_size;
-  m_global_last_mini_batch_size = m_mini_batch_size;
-
-  m_reset_mini_batch_index = 0;
-  m_loaded_mini_batch_idx = 0;
-  m_current_mini_batch_idx = 0;
-
-  m_stride_to_next_mini_batch = mb_size;
-  m_stride_to_last_mini_batch = mb_size;
-
-  m_base_offset = 0;
-  m_model_offset = 0;
-  m_sample_stride = 1;
-  m_iteration_stride = 1;
-
-  m_world_master_mini_batch_adjustment = 0;
-}
-
 bool generic_data_reader::update(bool is_active_reader) {
   bool reader_not_done = true; // BVE The sense of this should be fixed
   m_current_mini_batch_idx++;
@@ -330,7 +293,7 @@ bool generic_data_reader::update(bool is_active_reader) {
   }
   if (m_current_mini_batch_idx == m_num_iterations_per_epoch) {
     // for working with 1B jag samples, we may not process all the data
-    if ((get_rank() < m_num_parallel_readers) && (m_current_pos < (int)m_shuffled_indices.size()) && !m_jag_partitioned) {
+    if ((m_comm->get_rank_in_trainer() < m_num_parallel_readers) && (m_current_pos < (int)m_shuffled_indices.size())) {
       throw lbann_exception(
         std::string{} + __FILE__ + " " + std::to_string(__LINE__)
         + " :: generic data reader update error: the epoch is complete,"
@@ -444,12 +407,6 @@ size_t generic_data_reader::get_num_indices_to_use() const {
 }
 
 void generic_data_reader::resize_shuffled_indices() {
-  // ensure that all readers have the same number of indices
-  if (m_jag_partitioned) {
-    size_t n = m_comm->trainer_allreduce<size_t>(m_shuffled_indices.size(), El::mpi::MIN);
-    m_shuffled_indices.resize(n);
-  }
-
   size_t num_indices = get_num_indices_to_use();
   shuffle_indices();
   m_shuffled_indices.resize(num_indices);
@@ -706,7 +663,7 @@ void generic_data_reader::instantiate_data_store() {
     return;
   }
 
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "\nUSING DATA_STORE\n\n";
   }
   m_data_store = new data_store_conduit(this);  // *data_store_conduit
@@ -747,7 +704,7 @@ bool generic_data_reader::data_store_active() const {
     return true;
   }
 
-  const auto& c = static_cast<const sgd_execution_context&>(m_trainer->get_data_coordinator().get_execution_context());
+  const auto& c = static_cast<const sgd_execution_context&>(get_trainer().get_data_coordinator().get_execution_context());
   /// Use the data store for all modes except testing
   /// i.e. training, validation, tournament
   return (m_data_store != nullptr
@@ -758,7 +715,7 @@ bool generic_data_reader::data_store_active() const {
 }
 
 bool generic_data_reader::priming_data_store() const {
-  const auto& c = static_cast<const sgd_execution_context&>(m_trainer->get_data_coordinator().get_execution_context());
+  const auto& c = static_cast<const sgd_execution_context&>(get_trainer().get_data_coordinator().get_execution_context());
   if (m_data_store != nullptr && m_data_store->is_fully_loaded()) {
     return false;
   }
@@ -786,13 +743,6 @@ void generic_data_reader::set_mini_batch_size(const int s) {
 
 void generic_data_reader::set_role(std::string role) {
   m_role = role;
-  if (global_argument_parser().get<bool>(JAG_PARTITIONED) &&
-      get_role() == "train") {
-    m_jag_partitioned = true;
-    if (is_master()) {
-      std::cout << "USING JAG DATA PARTITIONING\n";
-    }
-  }
 }
 
 void generic_data_reader::preload_data_store() {
@@ -826,7 +776,7 @@ void generic_data_reader::preload_data_store() {
 }
 
 void generic_data_reader::print_get_methods(const std::string filename) {
-  if (!is_master()) {
+  if (!get_comm()->am_world_master()) {
     return;
   }
   std::ofstream out(filename.c_str());
diff --git a/src/data_readers/data_reader_HDF5.cpp b/src/data_readers/data_reader_HDF5.cpp
index 01ad0de403c..65472d29ab2 100644
--- a/src/data_readers/data_reader_HDF5.cpp
+++ b/src/data_readers/data_reader_HDF5.cpp
@@ -209,7 +209,7 @@ void hdf5_data_reader::copy_members(const hdf5_data_reader& rhs)
 
 void hdf5_data_reader::load()
 {
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "hdf5_data_reader - starting load" << std::endl;
   }
   double tm1 = get_time();
@@ -231,7 +231,7 @@ void hdf5_data_reader::load()
 
   // Load the sample list(s)
   data_reader_sample_list::load();
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "time to load sample list: " << get_time() - tm11 << std::endl;
   }
 
@@ -257,14 +257,14 @@ void hdf5_data_reader::load()
   load_schema(get_experiment_schema_filename(), m_experiment_schema);
   parse_schemas();
 
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "time to load and parse the schemas: " << get_time() - tm11
               << " for role: " << get_role() << std::endl;
     std::cout << "hdf5_data_reader::load() time: " << (get_time() - tm1)
               << "; num samples: " << m_shuffled_indices.size() << std::endl;
   }
 
-  if (!arg_parser.get<bool>(QUIET) && is_master()) {
+  if (!arg_parser.get<bool>(QUIET) && get_comm()->am_world_master()) {
     print_metadata();
   }
 }
@@ -284,14 +284,14 @@ void hdf5_data_reader::load_schema(std::string filename, conduit::Node& schema)
 void hdf5_data_reader::do_preload_data_store()
 {
   double tm1 = get_time();
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "starting hdf5_data_reader::do_preload_data_store() for role: "
               << get_role() << std::endl;
   }
 
   for (size_t idx = 0; idx < m_shuffled_indices.size(); idx++) {
     int index = m_shuffled_indices[idx];
-    if (m_data_store->get_index_owner(index) != get_rank()) {
+    if (m_data_store->get_index_owner(index) != get_comm()->get_rank_in_trainer()) {
       continue;
     }
     try {
@@ -310,14 +310,14 @@ void hdf5_data_reader::do_preload_data_store()
 
   for (size_t idx = 0; idx < m_shuffled_indices.size(); idx++) {
     int index = m_shuffled_indices[idx];
-    if (m_data_store->get_index_owner(index) != get_rank()) {
+    if (m_data_store->get_index_owner(index) != get_comm()->get_rank_in_trainer()) {
       continue;
     }
     close_file(index); // data_reader_sample_list::close_file
   }
 
   size_t nn = m_data_store->get_num_global_indices();
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "loading data for role: " << get_role() << " took "
               << get_time() - tm1 << "s"
               << "num samples (local to this rank): "
diff --git a/src/data_readers/data_reader_csv.cpp b/src/data_readers/data_reader_csv.cpp
index f5a59108fc2..52058a7433f 100644
--- a/src/data_readers/data_reader_csv.cpp
+++ b/src/data_readers/data_reader_csv.cpp
@@ -250,7 +250,7 @@ void csv_reader::load() {
   //bcast the index vector
   m_comm->world_broadcast<long long>(0, index);
   m_num_samples = index.size() - 1;
-  if (m_master) std::cerr << "num samples: " << m_num_samples << "\n";
+  if (get_comm()->am_world_master()) std::cerr << "num samples: " << m_num_samples << "\n";
 
   m_index.reserve(index.size());
   for (auto t : index) {
diff --git a/src/data_readers/data_reader_image.cpp b/src/data_readers/data_reader_image.cpp
index d4cd515b667..f2636abab76 100644
--- a/src/data_readers/data_reader_image.cpp
+++ b/src/data_readers/data_reader_image.cpp
@@ -234,7 +234,7 @@ void image_data_reader::do_preload_data_store() {
 
   bool threaded = !arg_parser.get<bool>(DATA_STORE_NO_THREAD);
   if (threaded) {
-    if (is_master()) {
+    if (get_comm()->am_world_master()) {
       std::cout << "mode: data_store_thread\n";
     }
     std::shared_ptr<thread_pool> io_thread_pool =
@@ -265,7 +265,7 @@ void image_data_reader::do_preload_data_store() {
     io_thread_pool->finish_work_group();
   }
   else {
-    if (is_master()) {
+    if (get_comm()->am_world_master()) {
       std::cout << "mode: NOT data_store_thread\n";
     }
     for (size_t data_id=0; data_id<m_shuffled_indices.size(); data_id++) {
@@ -364,7 +364,7 @@ void image_data_reader::load_list_of_samples(const std::string sample_list_file)
 
   double tm2 = get_time();
 
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "Time to load sample list '" << sample_list_file << "': "
               << tm2 - tm1 << std::endl;
   }
@@ -374,7 +374,7 @@ void image_data_reader::load_list_of_samples(const std::string sample_list_file)
   set_file_dir(m_sample_list.get_samples_dirname());
 
   double tm3 = get_time();
-  if(is_master()) {
+  if(get_comm()->am_world_master()) {
     std::cout << "Time to gather sample list '" << sample_list_file << "': "
               << tm3 - tm2 << std::endl;
   }
@@ -395,7 +395,7 @@ void image_data_reader::load_list_of_samples_from_archive(const std::string& sam
   iarchive(m_sample_list); // Read the data from the archive
   double tm2 = get_time();
 
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "Time to load sample list from archive: " << tm2 - tm1 << std::endl;
   }
 }
@@ -482,7 +482,7 @@ void image_data_reader::gen_list_of_samples() {
 
   double tm2 = get_time();
 
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "Time to load sample list '" << sample_list_file << "': "
               << tm2 - tm1 << std::endl;
   }
@@ -491,7 +491,7 @@ void image_data_reader::gen_list_of_samples() {
   m_sample_list.all_gather_packed_lists(*m_comm);
 
   double tm3 = get_time();
-  if(is_master()) {
+  if(get_comm()->am_world_master()) {
     std::cout << "Time to gather sample list '" << sample_list_file << "': "
               << tm3 - tm2 << std::endl;
   }
@@ -576,7 +576,7 @@ void image_data_reader::load_labels(std::vector<char>& preloaded_buffer) {
     read_labels(is);
   }
 
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "Time to load label file '" << imageListFile << "': "
               << get_time() - tm1 << std::endl;
   }
diff --git a/src/data_readers/data_reader_imagenet.cpp b/src/data_readers/data_reader_imagenet.cpp
index fa926a6e8ba..c2f71f8da12 100644
--- a/src/data_readers/data_reader_imagenet.cpp
+++ b/src/data_readers/data_reader_imagenet.cpp
@@ -82,7 +82,7 @@ bool imagenet_reader::fetch_datum(CPUMat& X, int data_id, int mb_idx) {
         LBANN_ERROR("you shouldn't be here; please contact Dave Hysom");
       }
       if (m_issue_warning) {
-        if (is_master()) {
+        if (get_comm()->am_world_master()) {
           LBANN_WARNING("m_data_store != nullptr, but we are not retrivieving a node from the store; role: " + get_role() + "; this is probably OK for test mode, but may be an error for train or validate modes");
         }
       }
diff --git a/src/data_readers/data_reader_jag_conduit.cpp b/src/data_readers/data_reader_jag_conduit.cpp
index 5ddd44da5b7..48cec4f88d0 100644
--- a/src/data_readers/data_reader_jag_conduit.cpp
+++ b/src/data_readers/data_reader_jag_conduit.cpp
@@ -745,14 +745,14 @@ void data_reader_jag_conduit::load() {
     m_num_labels=2;
   }
 
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "JAG load GAN m_gan_labelling : label_value "
               << m_gan_labelling <<" : " << m_gan_label_value << std::endl;
   }
 
   m_shuffled_indices.clear();
 
-  if(is_master()) {
+  if(get_comm()->am_world_master()) {
     std::cout << "data_reader_jag_conduit - starting load" << std::endl;
   }
   const std::string sample_list_file = get_data_sample_list();
@@ -795,9 +795,6 @@ void data_reader_jag_conduit::do_preload_data_store() {
   conduit::Node work;
   const std::string key; // key = "" is intentional
 
-  /// @todo BVE FIXME this
-  m_rank_in_model = get_comm()->get_rank_in_trainer();
-
   auto& arg_parser = global_argument_parser();
   double tm1 = get_time();
   if (get_comm()->am_world_master() ||
@@ -807,7 +804,7 @@ void data_reader_jag_conduit::do_preload_data_store() {
 
   for (size_t idx=0; idx < m_shuffled_indices.size(); idx++) {
     int index = m_shuffled_indices[idx];
-    if(m_data_store->get_index_owner(index) != m_rank_in_model) {
+    if(m_data_store->get_index_owner(index) != get_comm()->get_rank_in_trainer()) {
       continue;
     }
     try {
@@ -832,7 +829,7 @@ void data_reader_jag_conduit::do_preload_data_store() {
   /// Once all of the data has been preloaded, close all of the file handles
   for (size_t idx=0; idx < m_shuffled_indices.size(); idx++) {
     int index = m_shuffled_indices[idx];
-    if(m_data_store->get_index_owner(index) != m_rank_in_model) {
+    if(m_data_store->get_index_owner(index) != get_comm()->get_rank_in_trainer()) {
       continue;
     }
     m_sample_list.close_samples_file_handle(index, true);
@@ -915,14 +912,14 @@ void data_reader_jag_conduit::load_list_of_samples(const std::string sample_list
 
   double tm2 = get_time();
 
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "Time to load sample list '" << sample_list_file << "': " << tm2 - tm1 << std::endl;
   }
 
   sample_schema_check(check_data);
 
   double tm3 = get_time();
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     if (!check_data) {
       std::cout << "Skip data checking" << std::endl;
     } else {
@@ -935,7 +932,7 @@ void data_reader_jag_conduit::load_list_of_samples(const std::string sample_list
   set_file_dir(m_sample_list.get_samples_dirname());
 
   double tm4 = get_time();
-  if(is_master()) {
+  if(get_comm()->am_world_master()) {
     std::cout << "Time to gather sample list '" << sample_list_file << "': " << tm4 - tm3 << std::endl;
   }
 }
@@ -950,7 +947,7 @@ void data_reader_jag_conduit::load_list_of_samples_from_archive(const std::strin
   iarchive(m_sample_list); // Read the data from the archive
   double tm2 = get_time();
 
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "Time to load sample list from archive: " << tm2 - tm1 << std::endl;
   }
 }
@@ -1425,16 +1422,6 @@ bool data_reader_jag_conduit::fetch(CPUMat& X, int data_id, conduit::Node& sampl
   return true;
 }
 
-
-
-
-
-
-
-
-
-
-
 bool data_reader_jag_conduit::fetch_datum(CPUMat& X, int data_id, int mb_idx) {
   int tid = m_io_thread_pool->get_local_thread_id();
   std::vector<size_t> sizes = get_linearized_data_sizes();
@@ -1465,7 +1452,7 @@ bool data_reader_jag_conduit::fetch_datum(CPUMat& X, int data_id, int mb_idx) {
 }
 
 bool data_reader_jag_conduit::fetch_response(CPUMat& X, int data_id, int mb_idx) {
-  const auto& c = static_cast<const sgd_execution_context&>(m_trainer->get_data_coordinator().get_execution_context());
+  const auto& c = static_cast<const sgd_execution_context&>(get_trainer().get_data_coordinator().get_execution_context());
   int tid = m_io_thread_pool->get_local_thread_id();
   std::vector<size_t> sizes = get_linearized_response_sizes();
   std::vector<CPUMat> X_v = create_datum_views(X, sizes, mb_idx);
diff --git a/src/data_readers/data_reader_merge_features.cpp b/src/data_readers/data_reader_merge_features.cpp
index b7f6558d84b..9cf151fff68 100644
--- a/src/data_readers/data_reader_merge_features.cpp
+++ b/src/data_readers/data_reader_merge_features.cpp
@@ -71,7 +71,7 @@ void data_reader_merge_features::load() {
     reader->set_comm(m_comm);
     reader->load();
     m_data_size += reader->get_linearized_data_size();
-    if (is_master()) {
+    if (get_comm()->am_world_master()) {
       std::cerr << "time to set up subsidiary reader: " << get_time() - tm1 << "\n";
     }
   }
diff --git a/src/data_readers/data_reader_mnist.cpp b/src/data_readers/data_reader_mnist.cpp
index 972f1f07a08..bf2e4d70c92 100644
--- a/src/data_readers/data_reader_mnist.cpp
+++ b/src/data_readers/data_reader_mnist.cpp
@@ -142,7 +142,7 @@ void load_mnist_data(const std::string imagepath, const std::string labelpath,
 }
 
 void mnist_reader::load() {
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "starting lbann::mnist_reader::load\n";
   }
   m_image_data.clear();
@@ -157,7 +157,7 @@ void mnist_reader::load() {
   const std::string imagepath = FileDir + "/" + ImageFile;
   const std::string labelpath = FileDir + "/" + LabelFile;
 
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "read labels!\n";
   }
 
@@ -174,7 +174,7 @@ void mnist_reader::load() {
   for (size_t n = 0; n < m_shuffled_indices.size(); n++) {
     m_shuffled_indices[n] = n;
   }
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "calling select_subset_of_data; m_shuffled_indices.size: " <<
       m_shuffled_indices.size() << std::endl;
   }
diff --git a/src/data_readers/data_reader_npz_ras_lipid.cpp b/src/data_readers/data_reader_npz_ras_lipid.cpp
index a578976b215..66a789c302f 100644
--- a/src/data_readers/data_reader_npz_ras_lipid.cpp
+++ b/src/data_readers/data_reader_npz_ras_lipid.cpp
@@ -78,7 +78,7 @@ void ras_lipid_conduit_data_reader::copy_members(const ras_lipid_conduit_data_re
 }
 
 void ras_lipid_conduit_data_reader::load() {
-  if(is_master()) {
+  if(get_comm()->am_world_master()) {
     std::cout << "starting load for role: " << get_role() << std::endl;
   }
 
@@ -109,7 +109,7 @@ void ras_lipid_conduit_data_reader::load() {
   else {
     double tm3 = get_time();
     get_samples_per_file();
-    if (is_master()) std::cout << "time to compute samples_per_file: " << get_time() - tm3 << std::endl;
+    if (get_comm()->am_world_master()) std::cout << "time to compute samples_per_file: " << get_time() - tm3 << std::endl;
   }
   // Optionally save the samples-per-file info to file
   if (arg_parser.get<std::string>("pilot2_save_file_sizes") != "") {
@@ -189,7 +189,7 @@ void ras_lipid_conduit_data_reader::load() {
 }
 
 void ras_lipid_conduit_data_reader::do_preload_data_store() {
-  if (is_master()) std::cout << "starting ras_lipid_conduit_data_reader::do_preload_data_store; num indices: " << utils::commify(m_shuffled_indices.size()) << " for role: " << get_role() << std::endl;
+  if (get_comm()->am_world_master()) std::cout << "starting ras_lipid_conduit_data_reader::do_preload_data_store; num indices: " << utils::commify(m_shuffled_indices.size()) << " for role: " << get_role() << std::endl;
 
 #if 0
 ==========================================================================
@@ -262,7 +262,7 @@ data types, from python+numpy:
           load_the_next_sample(work[k], starting_id+k, data);
 
           ++nn;
-          if (verbose && is_master() && nn % 1000 == 0) {
+          if (verbose && get_comm()->am_world_master() && nn % 1000 == 0) {
             std::cout << "estimated number of single-samples processed: "
                       << utils::commify(nn/1000*np) << "K" << std::endl;
           }
@@ -271,7 +271,7 @@ data types, from python+numpy:
         // First branch: seq_len = 1
         if (which == 1) {
           // debug block; will go away
-          if (testme && is_master()) {
+          if (testme && get_comm()->am_world_master()) {
             std::cout << "Taking first branch (seq_len == 1)" << std::endl;
             testme = false;
           }
@@ -284,7 +284,7 @@ data types, from python+numpy:
         //        branch for debugging
         else {
           // debug block; will go away
-          if (is_master() && m_seq_len == 1 && testme) {
+          if (get_comm()->am_world_master() && m_seq_len == 1 && testme) {
             std::cout << "Taking second branch (seq_len == 1)" << std::endl;
             testme = false;
           }
@@ -440,7 +440,7 @@ void ras_lipid_conduit_data_reader::get_samples_per_file() {
 }
 
 void ras_lipid_conduit_data_reader::write_file_sizes() {
-  if (! is_master()) {
+  if (! get_comm()->am_world_master()) {
     return;
   }
   std::string fn =
@@ -486,7 +486,7 @@ void ras_lipid_conduit_data_reader::read_normalization_data() {
   if (arg_parser.get<std::string>(NORMALIZATION) != "") {
     m_use_min_max = true;
     m_use_z_score = arg_parser.get<bool>(Z_SCORE);
-    if (is_master()) {
+    if (get_comm()->am_world_master()) {
       if (m_use_z_score) {
         std::cout << "Normalizing data using z-score" << std::endl;
       } else {
@@ -518,7 +518,7 @@ void ras_lipid_conduit_data_reader::read_normalization_data() {
     }
   }
   else {
-    if (is_master()) {
+    if (get_comm()->am_world_master()) {
       std::cout << "NOT Normalizing data!" << std::endl;
     }
   }
@@ -526,7 +526,7 @@ void ras_lipid_conduit_data_reader::read_normalization_data() {
 
 //user feedback
 void ras_lipid_conduit_data_reader::print_shapes_etc() {
-  if (!is_master()) {
+  if (!get_comm()->am_world_master()) {
     return;
   }
 
diff --git a/src/data_readers/data_reader_numpy_npz_conduit.cpp b/src/data_readers/data_reader_numpy_npz_conduit.cpp
index 279678cd1a0..8f2689d95f5 100644
--- a/src/data_readers/data_reader_numpy_npz_conduit.cpp
+++ b/src/data_readers/data_reader_numpy_npz_conduit.cpp
@@ -73,7 +73,7 @@ void numpy_npz_conduit_reader::copy_members(const numpy_npz_conduit_reader &rhs)
 }
 
 void numpy_npz_conduit_reader::load() {
-  if(is_master()) {
+  if(get_comm()->am_world_master()) {
     std::cout << "starting load" << std::endl;
   }
 
@@ -117,7 +117,7 @@ void numpy_npz_conduit_reader::load() {
 void numpy_npz_conduit_reader::do_preload_data_store() {
   double tm1 = get_time();
 
-  if (is_master()) std::cout << "Starting numpy_npz_conduit_reader::preload_data_store; num indices: " << m_shuffled_indices.size() << std::endl;
+  if (get_comm()->am_world_master()) std::cout << "Starting numpy_npz_conduit_reader::preload_data_store; num indices: " << m_shuffled_indices.size() << std::endl;
 
   size_t count = get_absolute_sample_count();
   double use_percent = get_use_percent();
@@ -133,7 +133,7 @@ void numpy_npz_conduit_reader::do_preload_data_store() {
 
   //threaded mode
   if (threaded) {
-    if (is_master()) {
+    if (get_comm()->am_world_master()) {
       std::cout << "mode: data_store_thread\n";
     }
     std::shared_ptr<thread_pool> io_thread_pool =
@@ -216,7 +216,7 @@ void numpy_npz_conduit_reader::do_preload_data_store() {
     // set of zero-based labels, so let's pretend like we do
     if (m_num_labels != 0) { //note: num_labels may be specified in the reader
       m_num_labels = trainer_max - trainer_min;
-      if(is_master()) {
+      if(get_comm()->am_world_master()) {
         std::cout << "num_labels: " << m_num_labels << "\n";
       }
     }
@@ -236,7 +236,7 @@ void numpy_npz_conduit_reader::do_preload_data_store() {
   #endif
 
   double tm2 = get_time();
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "time to preload: " << tm2 - tm1 << " for role: " << get_role() << "\n";
   }
 }
@@ -263,7 +263,7 @@ bool numpy_npz_conduit_reader::fetch_datum(Mat& X, int data_id, int mb_idx) {
     load_npz(m_filenames[data_id], data_id, node);
     //note: if testing, and test set is touched more than once, the following
     //      will through an exception TODO: relook later
-    const auto& c = static_cast<const execution_context&>(m_trainer->get_data_coordinator().get_execution_context());
+    const auto& c = static_cast<const execution_context&>(get_trainer().get_data_coordinator().get_execution_context());
     if (priming_data_store() || c.get_execution_mode() == execution_mode::testing) {
       m_data_store->set_conduit_node(data_id, node);
     }
@@ -375,7 +375,7 @@ void numpy_npz_conduit_reader::fill_in_metadata() {
   }
   in.close();
   m_num_samples = m_filenames.size();
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "num samples: " << m_num_samples << "\n";
   }
 
@@ -393,7 +393,7 @@ void numpy_npz_conduit_reader::fill_in_metadata() {
                                    m_data_dims.end(),
                                    (unsigned) 1,
                                    std::multiplies<unsigned>());
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "num features: " << m_num_features << "\n";
   }
 
@@ -404,7 +404,7 @@ void numpy_npz_conduit_reader::fill_in_metadata() {
                 std::to_string(word_size) + " not supported");
   }
   m_data_word_size = word_size;
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "data word size: " << m_data_word_size << "\n";
   }
 
@@ -423,7 +423,7 @@ void numpy_npz_conduit_reader::fill_in_metadata() {
     for (int k=1; k<n; k++) {
       m_num_response_features *= r_shape[k];
     }
-    if (is_master()) {
+    if (get_comm()->am_world_master()) {
       std::cout << "response word size: " << m_response_word_size << "\n";
       std::cout << "num response features: " << m_num_response_features<< "\n";
     }
diff --git a/src/data_readers/data_reader_python.cpp b/src/data_readers/data_reader_python.cpp
index 1b4788db99b..15b4783ae7f 100644
--- a/src/data_readers/data_reader_python.cpp
+++ b/src/data_readers/data_reader_python.cpp
@@ -187,7 +187,7 @@ void python_reader::setup(int num_io_threads,
   /// @todo Figure out more robust way to get max mini-batch size
   const El::Int sample_size = get_linearized_data_size();
   const El::Int mini_batch_size
-    = generic_data_reader::get_trainer().get_max_mini_batch_size();
+    = get_trainer().get_max_mini_batch_size();
   std::string datatype_typecode;
   switch (sizeof(DataType)) {
   case 4: datatype_typecode = "f"; break;
diff --git a/src/data_readers/data_reader_smiles.cpp b/src/data_readers/data_reader_smiles.cpp
index 85497bbda08..2564f54ac49 100644
--- a/src/data_readers/data_reader_smiles.cpp
+++ b/src/data_readers/data_reader_smiles.cpp
@@ -53,7 +53,7 @@ smiles_data_reader::smiles_data_reader(const smiles_data_reader& rhs)  : data_re
 
 smiles_data_reader::~smiles_data_reader() {
   if (m_missing_chars.size()) {
-    if (is_master()) {
+    if (get_comm()->am_world_master()) {
       std::cout << std::endl << "The following tokens were in SMILES strings, but were missing from the vocabulary: ";
       for (const auto t : m_missing_chars) {
         std::cout << t << " ";
@@ -108,7 +108,7 @@ void smiles_data_reader::copy_members(const smiles_data_reader &rhs) {
 }
 
 void smiles_data_reader::load() {
-  if(is_master()) {
+  if(get_comm()->am_world_master()) {
     std::cout << "starting load for role: " << get_role() << std::endl;
   }
 
@@ -139,7 +139,7 @@ void smiles_data_reader::load() {
 
   // Load the sample list(s)
   data_reader_sample_list::load();
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "time to load sample list: " << get_time() - tm1 << std::endl;
   }
 
@@ -159,7 +159,7 @@ void smiles_data_reader::load() {
 
 void smiles_data_reader::do_preload_data_store() {
   double tm1 = get_time();
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << "starting do_preload_data_store; num indices: "
               << utils::commify(m_shuffled_indices.size())
               << "; role: " << get_role() << std::endl;
@@ -260,7 +260,7 @@ void smiles_data_reader::do_preload_data_store() {
         for (const auto& [r_local, r_index] : samples_in_range) {
           (void) r_local; // silence compiler warning about unused variable.
           // BVE CHECK THIS
-          if (m_data_store->get_index_owner(r_index) != m_rank_in_model) {
+          if (m_data_store->get_index_owner(r_index) != get_comm()->get_rank_in_trainer()) {
             continue;
           }
 
@@ -280,7 +280,7 @@ void smiles_data_reader::do_preload_data_store() {
     in.close();
   }
 
-  if (is_master()) {
+  if (get_comm()->am_world_master()) {
     std::cout << " do_preload_data_store time: " << get_time() - tm1 << std::endl;
   }
 }
@@ -289,7 +289,7 @@ std::set<int> smiles_data_reader::get_my_indices() const {
   std::set<int> s;
   for (size_t j=0; j<m_shuffled_indices.size(); j++) {
     const int index = m_shuffled_indices[j];
-    if (m_data_store->get_index_owner(index) == m_rank_in_model) {
+    if (m_data_store->get_index_owner(index) == get_comm()->get_rank_in_trainer()) {
       s.insert(index);
     }
   }
@@ -327,7 +327,7 @@ bool smiles_data_reader::fetch_response(Mat& Y, int data_id, int mb_idx) {
 
 //user feedback
 void smiles_data_reader::print_statistics() const {
-  if (!is_master()) {
+  if (!get_comm()->am_world_master()) {
     return;
   }
 
@@ -467,13 +467,13 @@ void smiles_data_reader::load_offsets_and_lengths() {
   // trainer P_0 fills in offset_data vector, then bcasts
   std::vector<SampleData> offset_data;
 
-  if (m_comm->am_trainer_master()) {
+  if (get_comm()->am_trainer_master()) {
     read_offset_data(offset_data);
   }
   size_t n_samples = offset_data.size(); // only meaningful for root
-  m_comm->trainer_broadcast<size_t>(0, &n_samples, 1);
+  get_comm()->trainer_broadcast<size_t>(0, &n_samples, 1);
   offset_data.resize(n_samples); // not meaningful for root
-  m_comm->trainer_broadcast<SampleData>(0, offset_data.data(), offset_data.size());
+  get_comm()->trainer_broadcast<SampleData>(0, offset_data.data(), offset_data.size());
 
   // fill in the m_sample_offsets map
   for (size_t j=0; j<offset_data.size(); j++) {
diff --git a/src/data_readers/unit_test/data_reader_HDF5_hrrl_public_api.cpp b/src/data_readers/unit_test/data_reader_HDF5_hrrl_public_api.cpp
index c289d9f0248..0ccc3c5cc8d 100644
--- a/src/data_readers/unit_test/data_reader_HDF5_hrrl_public_api.cpp
+++ b/src/data_readers/unit_test/data_reader_HDF5_hrrl_public_api.cpp
@@ -164,7 +164,6 @@ TEST_CASE("hdf5 data reader data field fetch tests",
   // Manually tell the data reader to extract all of the data fields
   white_box_tester.construct_linearized_size_lookup_tables(*hdf5_dr, ref_node);
 
-  hdf5_dr->set_rank(0);
   hdf5_dr->set_comm(&comm);
 
   El::Int num_samples = 1;
diff --git a/src/data_readers/unit_test/data_reader_synthetic_test_public_api.cpp b/src/data_readers/unit_test/data_reader_synthetic_test_public_api.cpp
index 28d24dfc9db..f7b6c0ffbf6 100644
--- a/src/data_readers/unit_test/data_reader_synthetic_test_public_api.cpp
+++ b/src/data_readers/unit_test/data_reader_synthetic_test_public_api.cpp
@@ -141,7 +141,6 @@ TEST_CASE("Synthetic data reader public API tests",
       LBANN_ERROR("Unknown data field");
     }
     dr->setup(io_thread_pool->get_num_threads(), io_thread_pool.get());
-    dr->set_rank(0);
     dr->set_comm(&comm);
     dr->set_num_parallel_readers(1);
     dr->load();
@@ -149,7 +148,7 @@ TEST_CASE("Synthetic data reader public API tests",
     dr->set_last_mini_batch_size(num_samples);
     dr->set_initial_position();
 
-    dr->fetch(local_input_buffers, indices_fetched);
+    dr->fetch(local_input_buffers, indices_fetched, num_samples);
 
     // Check all of the results that were fetched.  Ensure that the
     // data fields are accessed in the same order that they are in the map
@@ -235,7 +234,6 @@ TEST_CASE("Synthetic data reader public API tests - arbitrary field",
                                                              fields,
                                                              false);
     dr->setup(io_thread_pool->get_num_threads(), io_thread_pool.get());
-    dr->set_rank(0);
     dr->set_comm(&comm);
     dr->set_num_parallel_readers(1);
     dr->load();
@@ -243,7 +241,7 @@ TEST_CASE("Synthetic data reader public API tests - arbitrary field",
     dr->set_last_mini_batch_size(num_samples);
     dr->set_initial_position();
 
-    dr->fetch(local_input_buffers, indices_fetched);
+    dr->fetch(local_input_buffers, indices_fetched, num_samples);
 
     // Check all of the results that were fetched.  Ensure that the
     // data fields are accessed in the same order that they are in the map
@@ -276,7 +274,6 @@ TEST_CASE("Synthetic data reader public API tests - arbitrary field",
                                                              test_fields,
                                                              false);
     dr->setup(io_thread_pool->get_num_threads(), io_thread_pool.get());
-    dr->set_rank(0);
     dr->set_comm(&comm);
     dr->set_num_parallel_readers(1);
     dr->load();
@@ -284,7 +281,7 @@ TEST_CASE("Synthetic data reader public API tests - arbitrary field",
     dr->set_last_mini_batch_size(num_samples);
     dr->set_initial_position();
 
-    CHECK_THROWS(dr->fetch(local_input_buffers, indices_fetched));
+    CHECK_THROWS(dr->fetch(local_input_buffers, indices_fetched, num_samples));
 
     // All data buffers should be empty since it will have thrown an exception
     for (El::Int j = 0; j < num_samples; j++) {
@@ -312,7 +309,6 @@ TEST_CASE("Synthetic data reader public API tests - arbitrary field",
                                                              test_fields,
                                                              false);
     dr->setup(io_thread_pool->get_num_threads(), io_thread_pool.get());
-    dr->set_rank(0);
     dr->set_comm(&comm);
     dr->set_num_parallel_readers(1);
     dr->load();
@@ -320,7 +316,7 @@ TEST_CASE("Synthetic data reader public API tests - arbitrary field",
     dr->set_last_mini_batch_size(num_samples);
     dr->set_initial_position();
 
-    dr->fetch(test_local_input_buffers, indices_fetched);
+    dr->fetch(test_local_input_buffers, indices_fetched, num_samples);
 
     // Check all of the results that were fetched.  Ensure that the
     // data fields are accessed in the same order that they are in the map
@@ -353,7 +349,6 @@ TEST_CASE("Synthetic data reader public API tests - arbitrary field",
                                                              fields,
                                                              false);
     dr->setup(io_thread_pool->get_num_threads(), io_thread_pool.get());
-    dr->set_rank(0);
     dr->set_comm(&comm);
     dr->set_num_parallel_readers(1);
     dr->load();
@@ -365,7 +360,7 @@ TEST_CASE("Synthetic data reader public API tests - arbitrary field",
       dr->set_has_data_field(data_field, false);
     }
 
-    CHECK_THROWS(dr->fetch(local_input_buffers, indices_fetched));
+    CHECK_THROWS(dr->fetch(local_input_buffers, indices_fetched, num_samples));
 
     // All data buffers should be empty since it will have thrown an exception
     for (El::Int j = 0; j < num_samples; j++) {
diff --git a/src/proto/proto_common.cpp b/src/proto/proto_common.cpp
index 47e7780c18f..16addbd1ee8 100644
--- a/src/proto/proto_common.cpp
+++ b/src/proto/proto_common.cpp
@@ -464,8 +464,6 @@ void init_data_readers(
       }
     }
 
-    reader->set_master(master);
-
     reader->load();
 
     if (readme.role() == "train") {
diff --git a/src/trainers/trainer.cpp b/src/trainers/trainer.cpp
index 731f0a04042..e6825597ead 100644
--- a/src/trainers/trainer.cpp
+++ b/src/trainers/trainer.cpp
@@ -115,9 +115,6 @@ void trainer::setup(std::unique_ptr<thread_pool> io_thread_pool,
   // layer depends on having a properly initialized thread pool)
   m_io_thread_pool = std::move(io_thread_pool);
 
-  for (auto d : data_readers) {
-    d.second->set_trainer(this);
-  }
   m_data_coordinator.get()->setup(*m_io_thread_pool.get(),
                                   get_max_mini_batch_size(),
                                   data_readers);

From 4e108cd09ce7714007fda24ee51e56ef54d42b49 Mon Sep 17 00:00:00 2001
From: Tim Moon <moon13@llnl.gov>
Date: Wed, 20 Oct 2021 14:11:54 -0700
Subject: [PATCH 21/37] Debug NVSHMEM with distconv (#1976)

* Add option to initialize NVSHMEM when initializing LBANN

* Fix build error when including headers from NVSHMEM 2.2
---
 include/lbann/utils/nvshmem.hpp       |  1 +
 include/lbann/utils/options.hpp       |  4 +++-
 src/base.cpp                          | 26 ++++++++++++++++++++++++--
 src/data_readers/data_reader_HDF5.cpp |  2 +-
 src/utils/options.cpp                 | 12 +++++++++++-
 5 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/include/lbann/utils/nvshmem.hpp b/include/lbann/utils/nvshmem.hpp
index e3b5b1d422a..cd8d542eeee 100644
--- a/include/lbann/utils/nvshmem.hpp
+++ b/include/lbann/utils/nvshmem.hpp
@@ -32,6 +32,7 @@
 #include "lbann/utils/gpu/helpers.hpp"
 #include "lbann/utils/exception.hpp"
 #include <mpi.h>
+#define NVSHMEM_USE_NCCL
 #include <nvshmem.h>
 #include <nvshmemx.h>
 
diff --git a/include/lbann/utils/options.hpp b/include/lbann/utils/options.hpp
index 1b796c993e3..d0836c541b2 100644
--- a/include/lbann/utils/options.hpp
+++ b/include/lbann/utils/options.hpp
@@ -30,6 +30,8 @@ namespace lbann {
 #define VERBOSE "verbose"
 #define WRITE_SAMPLE_LIST "write_sample_list"
 #define USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP "Use Hydrogen's default memory mode for GPU buffers in forward prop"
+#define LBANN_OPTION_INIT_SHMEM "Initialize SHMEM when initializing LBANN"
+#define LBANN_OPTION_INIT_NVSHMEM "Initialize NVSHMEM when initializing LBANN"
 
 // Input options
 #define CKPT_DIR "ckpt_dir"
@@ -81,7 +83,7 @@ namespace lbann {
 #define LOAD_FULL_SAMPLE_LIST_ONCE "load_full_sample_list_once"
 #define MAKE_TEST_FAIL "make_test_fail"
 #define NODE_SIZES_VARY "node_sizes_vary"
-#define QUIET "quiet"
+#define LBANN_OPTION_QUIET "quiet"
 #define STACK_TRACE_TO_FILE "stack_trace_to_file"
 #define TEST_ENCODE "test_encode"
 #define WRITE_SAMPLE_LABEL_LIST "write_sample_label_list"
diff --git a/src/base.cpp b/src/base.cpp
index bf65c384a2d..6b635f7e120 100644
--- a/src/base.cpp
+++ b/src/base.cpp
@@ -40,8 +40,10 @@
 #endif // LBANN_HAS_SHMEM
 
 #include "lbann/comm_impl.hpp"
+#include "lbann/utils/argument_parser.hpp"
 #include "lbann/utils/exception.hpp"
 #include "lbann/utils/omp_diagnostics.hpp"
+#include "lbann/utils/options.hpp"
 #include "lbann/utils/stack_trace.hpp"
 
 #ifdef LBANN_HAS_DNN_LIB
@@ -79,6 +81,11 @@ MPI_Errhandler err_handle;
 
 std::unique_ptr<lbann_comm> initialize_lbann(El::mpi::Comm&& c)
 {
+
+  // Parse command-line arguments and environment variables
+  auto& arg_parser = global_argument_parser();
+  (void) arg_parser;
+
   // to ensure that all the necessary infrastructure in Hydrogen and
   // Aluminum has been setup.
   El::Initialize();
@@ -119,7 +126,7 @@ std::unique_ptr<lbann_comm> initialize_lbann(El::mpi::Comm&& c)
 
 #ifdef LBANN_HAS_SHMEM
   // Initialize SHMEM
-  {
+  if (arg_parser.get<bool>(LBANN_OPTION_INIT_SHMEM)) {
     int threading_level = SHMEM_THREAD_MULTIPLE;
     int status = shmem_init_thread(threading_level, &threading_level);
     if (status != 0 || threading_level != SHMEM_THREAD_MULTIPLE) {
@@ -127,6 +134,11 @@ std::unique_ptr<lbann_comm> initialize_lbann(El::mpi::Comm&& c)
     }
   }
 #endif // LBANN_HAS_SHMEM
+#ifdef LBANN_HAS_NVSHMEM
+  if (arg_parser.get<bool>(LBANN_OPTION_INIT_NVSHMEM)) {
+    nvshmem::initialize();
+  }
+#endif // LBANN_HAS_NVSHMEM
 
 #ifdef LBANN_HAS_DISTCONV
   dc::initialize(MPI_COMM_WORLD);
@@ -173,6 +185,11 @@ void finalize_lbann(lbann_comm* comm) {
 }
 
 world_comm_ptr initialize(int& argc, char**& argv) {
+
+  // Parse command-line arguments and environment variables
+  auto& arg_parser = global_argument_parser();
+  (void) arg_parser;
+
   // Initialize Elemental.
   El::Initialize(argc, argv);
 
@@ -213,7 +230,7 @@ world_comm_ptr initialize(int& argc, char**& argv) {
 
 #ifdef LBANN_HAS_SHMEM
   // Initialize SHMEM
-  {
+  if (arg_parser.get<bool>(LBANN_OPTION_INIT_SHMEM)) {
     int threading_level = SHMEM_THREAD_MULTIPLE;
     int status = shmem_init_thread(threading_level, &threading_level);
     if (status != 0 || threading_level != SHMEM_THREAD_MULTIPLE) {
@@ -221,6 +238,11 @@ world_comm_ptr initialize(int& argc, char**& argv) {
     }
   }
 #endif // LBANN_HAS_SHMEM
+#ifdef LBANN_HAS_NVSHMEM
+  if (arg_parser.get<bool>(LBANN_OPTION_INIT_NVSHMEM)) {
+    nvshmem::initialize();
+  }
+#endif // LBANN_HAS_NVSHMEM
 
 #ifdef LBANN_HAS_DISTCONV
   dc::initialize(MPI_COMM_WORLD);
diff --git a/src/data_readers/data_reader_HDF5.cpp b/src/data_readers/data_reader_HDF5.cpp
index 65472d29ab2..43438d32db6 100644
--- a/src/data_readers/data_reader_HDF5.cpp
+++ b/src/data_readers/data_reader_HDF5.cpp
@@ -264,7 +264,7 @@ void hdf5_data_reader::load()
               << "; num samples: " << m_shuffled_indices.size() << std::endl;
   }
 
-  if (!arg_parser.get<bool>(QUIET) && get_comm()->am_world_master()) {
+  if (!arg_parser.get<bool>(LBANN_OPTION_QUIET) && get_comm()->am_world_master()) {
     print_metadata();
   }
 }
diff --git a/src/utils/options.cpp b/src/utils/options.cpp
index 8d7abf1ccb6..6e2c7e89d3f 100644
--- a/src/utils/options.cpp
+++ b/src/utils/options.cpp
@@ -103,6 +103,16 @@ void construct_std_options()
     "forward prop (namely activations and weights). This will "
     "typically use a GPU memory pool, which uses more memory than "
     "directly allocating GPU memory.");
+  arg_parser.add_flag(
+    LBANN_OPTION_INIT_SHMEM,
+    {"--init_shmem"},
+    utils::ENV("LBANN_INIT_SHMEM"),
+    "[STD] Initialize SHMEM when initializing LBANN");
+  arg_parser.add_flag(
+    LBANN_OPTION_INIT_NVSHMEM,
+    {"--init_nvshmem"},
+    utils::ENV("LBANN_INIT_NVSHMEM"),
+    "[STD] Initialize NVSHMEM when initializing LBANN");
 
   // Input options
   arg_parser.add_option(
@@ -292,7 +302,7 @@ void construct_datareader_options()
   arg_parser.add_flag(NODE_SIZES_VARY,
                       {"--node_sizes_vary"},
                       "[DATAREADER] TODO");
-  arg_parser.add_flag(QUIET, {"--quiet"}, "[DATAREADER] TODO");
+  arg_parser.add_flag(LBANN_OPTION_QUIET, {"--quiet"}, "[DATAREADER] TODO");
   arg_parser.add_flag(STACK_TRACE_TO_FILE,
                       {"--stack_trace_to_file"},
                       "[DATAREADER] TODO");

From 7ce4e851bac37e1066fa8aac4999a3dd2a8eb8e8 Mon Sep 17 00:00:00 2001
From: Sam Ade Jacobs <jacobs32@llnl.gov>
Date: Thu, 21 Oct 2021 11:28:28 -0700
Subject: [PATCH 22/37] Integration test for PROBIESNet (#1989)

* Integration test for PROBIESNet

* Tweak the taget values

* Point test to model and data in app dir

* Remove hrrl model dir

* Remove hrrl data dir

Co-authored-by: Brian C. Van Essen <vanessen1@llnl.gov>
---
 .../test_integration_probiesnet.py            | 225 ++++++++++++++++++
 1 file changed, 225 insertions(+)
 create mode 100644 bamboo/integration_tests/test_integration_probiesnet.py

diff --git a/bamboo/integration_tests/test_integration_probiesnet.py b/bamboo/integration_tests/test_integration_probiesnet.py
new file mode 100644
index 00000000000..9b15ae606ec
--- /dev/null
+++ b/bamboo/integration_tests/test_integration_probiesnet.py
@@ -0,0 +1,225 @@
+import functools
+import operator
+import os
+import os.path
+import re
+import sys
+import numpy as np
+import google.protobuf.text_format
+import pytest
+
+# Local files
+current_file = os.path.realpath(__file__)
+lbann_dir = os.path.dirname(os.path.dirname(os.path.dirname(current_file)))
+app_path = os.path.join(lbann_dir, 'applications', 'physics','HRRL')
+sys.path.append(app_path)
+import tools
+
+# ==============================================
+# Options
+# ==============================================
+
+# Training options
+num_epochs = 10
+mini_batch_size = 32
+num_nodes = 1
+
+# Reconstruction loss
+expected_train_pc_range = (0.89, 0.92)
+expected_test_pc_range = (0.90, 0.925)
+
+# Average mini-batch time (in sec) for each LC system
+# Note that run times are with LBANN_DETERMINISTIC set
+# Commented out times are prior to thread safe RNGs
+expected_mini_batch_times = {
+    'lassen':   0.0051,
+    'pascal':   0.0146,
+}
+# ==============================================
+# Setup LBANN experiment
+# ==============================================
+def list2str(l):
+    return ' '.join(l)
+
+def make_data_reader(lbann):
+    """Make Protobuf message for HRRL  data reader.
+
+    """
+    import lbann.contrib.lc.paths
+
+    # Load data readers from prototext
+    protobuf_file = os.path.join(app_path,'data',
+                                 'probies_v2.prototext')
+
+    message = lbann.lbann_pb2.LbannPB()
+    with open(protobuf_file, 'r') as f:
+        google.protobuf.text_format.Merge(f.read(), message)
+    message = message.data_reader
+
+    # Set paths
+    return message
+
+def setup_experiment(lbann):
+    """Construct LBANN experiment.
+
+    Args:
+        lbann (module): Module for LBANN Python frontend
+
+    """
+    if tools.system(lbann) != 'lassen' and tools.system(lbann) != 'pascal':
+      message = f'{os.path.basename(__file__)} is only supported on lassen and pascal systems'
+      print('Skip - ' + message)
+      pytest.skip(message)
+
+    trainer = lbann.Trainer(mini_batch_size=mini_batch_size)
+    model = construct_model(lbann)
+
+    data_reader = make_data_reader(lbann)
+
+    opt = lbann.Adam(learn_rate=0.0002,beta1=0.9,beta2=0.99,eps=1e-8)
+    return trainer, model, data_reader, opt
+
+def construct_model(lbann):
+    """Construct LBANN model.
+    Args:
+        lbann (module): Module for LBANN Python frontend
+
+    """
+
+    import models.probiesNet as model
+
+    images = lbann.Input(data_field='samples')
+    responses = lbann.Input(data_field='responses')
+
+    num_labels = 5
+
+    images = lbann.Reshape(images, dims='1 300 300')
+
+
+    pred = model.PROBIESNet(num_labels)(images)
+
+    mse = lbann.MeanSquaredError([responses, pred])
+
+    # Pearson Correlation
+    # rho(x,y) = covariance(x,y) / sqrt( variance(x) * variance(y) )
+    pearson_r_cov = lbann.Covariance([pred, responses],
+				   name="pearson_r_cov")
+
+    pearson_r_var1 = lbann.Variance(responses,
+				 name="pearson_r_var1")
+
+    pearson_r_var2 = lbann.Variance(pred,
+				name="pearson_r_var2")
+
+
+    pearson_r_mult = lbann.Multiply([pearson_r_var1, pearson_r_var2],
+				    name="pearson_r_mult")
+
+    pearson_r_sqrt = lbann.Sqrt(pearson_r_mult,
+		            name="pearson_r_sqrt")
+
+    eps = lbann.Constant(value=1e-07,hint_layer=pearson_r_sqrt)
+    pearson_r = lbann.Divide([pearson_r_cov, lbann.Add(pearson_r_sqrt,eps)],
+			     name="pearson_r")
+
+
+    metrics = [lbann.Metric(mse, name='mse')]
+    metrics.append(lbann.Metric(pearson_r, name='pearson_r'))
+
+    callbacks = [lbann.CallbackPrint(),
+                 lbann.CallbackTimer()]
+
+
+    layers = list(lbann.traverse_layer_graph([images, responses]))
+    return lbann.Model(num_epochs,
+                    layers=layers,
+                    metrics=metrics,
+                    objective_function=mse,
+                    callbacks=callbacks)
+
+
+# ==============================================
+# Setup PyTest
+# ==============================================
+
+def augment_test_func(test_func):
+    """Augment test function to parse log files.
+
+    `tools.create_tests` creates functions that run an LBANN
+    experiment. This function creates augmented functions that parse
+    the log files after LBANN finishes running, e.g. to check metrics
+    or runtimes.
+
+    Note: The naive approach is to define the augmented test functions
+    in a loop. However, Python closures are late binding. In other
+    words, the function would be overwritten every time we define it.
+    We get around this overwriting problem by defining the augmented
+    function in the local scope of another function.
+
+    Args:
+        test_func (function): Test function created by
+            `tools.create_tests`.
+
+    Returns:
+        function: Test that can interact with PyTest.
+
+    """
+    test_name = test_func.__name__
+
+    # Define test function
+    def func(cluster, dirname,weekly):
+
+        if not weekly:
+            pytest.skip('This app runs {} with weekly builds only'.format(test_name))
+
+        # Run LBANN experiment
+        experiment_output = test_func(cluster, dirname)
+
+        # Parse LBANN log file
+        train_pc = None
+        test_pc = None
+        mini_batch_times = []
+        with open(experiment_output['stdout_log_file']) as f:
+            for line in f:
+                match = re.search('training epoch [0-9]+ pearson_r : ([0-9.]+)', line)
+                if match:
+                    train_pc = float(match.group(1))
+                match = re.search('test pearson_r : ([0-9.]+)', line)
+                if match:
+                    test_pc = float(match.group(1))
+                match = re.search('training epoch [0-9]+ mini-batch time statistics : ([0-9.]+)s mean', line)
+                if match:
+                    mini_batch_times.append(float(match.group(1)))
+
+        # Check if training reconstruction is within expected range
+        assert (expected_train_pc_range[0]
+                < train_pc
+                < expected_train_pc_range[1]), \
+                'train pearson correlation is outside expected range'
+
+        # Check if testing reconstruction  is within expected range
+        assert (expected_test_pc_range[0]
+                < test_pc
+                < expected_test_pc_range[1]), \
+                'test pearson correlation is outside expected range'
+
+        # Check if mini-batch time is within expected range
+        # Note: Skip first epoch since its runtime is usually an outlier
+        mini_batch_times = mini_batch_times[1:]
+        mini_batch_time = sum(mini_batch_times) / len(mini_batch_times)
+        assert (0.75 * expected_mini_batch_times[cluster]
+                < mini_batch_time
+                < 1.25 * expected_mini_batch_times[cluster]), \
+                'average mini-batch time is outside expected range'
+
+    # Return test function from factory function
+    func.__name__ = test_name
+    return func
+
+m_lbann_args=f"--use_data_store --preload_data_store"
+# Create test functions that can interact with PyTest
+for _test_func in tools.create_tests(setup_experiment,
+                                     __file__,
+                                     lbann_args=[m_lbann_args],
+                                     nodes=num_nodes):
+    globals()[_test_func.__name__] = augment_test_func(_test_func)

From 603a26fc532db4d4c50dcf0f07f9f598666387b5 Mon Sep 17 00:00:00 2001
From: Tim Moon <moon13@llnl.gov>
Date: Thu, 28 Oct 2021 10:38:55 -0700
Subject: [PATCH 23/37] Use same device for an operator layer and its operators
 (#1993)

---
 python/lbann/core/operator_layers.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/python/lbann/core/operator_layers.py b/python/lbann/core/operator_layers.py
index 0368c4481ec..238e97559bd 100644
--- a/python/lbann/core/operator_layers.py
+++ b/python/lbann/core/operator_layers.py
@@ -41,11 +41,26 @@ def __init__(self, *args, **kwargs):
 
     def export_proto(self):
         """Construct and return a protobuf message."""
-        if (self.datatype is None):
-            self.datatype = 0 # Use the default value.
+
+        # Use default datatype if not specified
+        if self.datatype is None:
+            self.datatype = 0
+
+        # Convert device string to enum
+        device = lbann.DeviceAllocation.DEFAULT_DEVICE
+        if isinstance(self.device, str):
+            if self.device.lower() == 'cpu':
+                device = lbann.DeviceAllocation.CPU
+            elif self.device.lower() == 'gpu':
+                device = lbann.DeviceAllocation.GPU
+
+        # Configure operators to match layer
         for o in self.ops:
             o.input_type = self.datatype
             o.output_type = self.datatype
+            o.device_allocation = device
+
+        # Generate Protobuf message
         return OperatorLayer.export_proto(self)
 
     # Return operator layer class

From 7aa5ad3df93c6c29ffb63f869e2a7a195830c301 Mon Sep 17 00:00:00 2001
From: Tim Moon <moon13@llnl.gov>
Date: Thu, 28 Oct 2021 13:17:26 -0700
Subject: [PATCH 24/37] NumPy weight initializer (#1956)

* Weight initializer from NumPy file

* Expand documentation of weight initializers

* Update input layer in NumPy initializer test
---
 bamboo/unit_tests/.gitignore                  |   1 +
 .../test_unit_weights_numpy_initializer.py    | 257 ++++++++++++++++++
 include/lbann/weights/initializer.hpp         |  50 +++-
 src/proto/factories/weights_factory.cpp       |   2 +
 src/proto/weights.proto                       |  37 ++-
 src/weights/initializer.cpp                   | 112 ++++++++
 6 files changed, 450 insertions(+), 9 deletions(-)
 create mode 100644 bamboo/unit_tests/test_unit_weights_numpy_initializer.py

diff --git a/bamboo/unit_tests/.gitignore b/bamboo/unit_tests/.gitignore
index 0cc4de789bf..1ff76fcf43c 100644
--- a/bamboo/unit_tests/.gitignore
+++ b/bamboo/unit_tests/.gitignore
@@ -1,2 +1,3 @@
 .cache
 *.prototext
+temp
\ No newline at end of file
diff --git a/bamboo/unit_tests/test_unit_weights_numpy_initializer.py b/bamboo/unit_tests/test_unit_weights_numpy_initializer.py
new file mode 100644
index 00000000000..ba6b0728f61
--- /dev/null
+++ b/bamboo/unit_tests/test_unit_weights_numpy_initializer.py
@@ -0,0 +1,257 @@
+import functools
+import operator
+import os
+import os.path
+import sys
+import numpy as np
+
+# Bamboo utilities
+current_file = os.path.realpath(__file__)
+current_dir = os.path.dirname(current_file)
+weights_dir = os.path.join(current_dir, 'temp')
+sys.path.insert(0, os.path.join(os.path.dirname(current_dir), 'common_python'))
+import tools
+os.makedirs(weights_dir, exist_ok=True)
+
+# ==============================================
+# Objects for Python data reader
+# ==============================================
+# Note: The Python data reader imports this file as a module and calls
+# the functions below to ingest data.
+
+# Data
+np.random.seed(20210826)
+_num_samples = 11
+_sample_dims = (4,3,2)
+_sample_size = functools.reduce(operator.mul, _sample_dims)
+_samples = np.random.normal(size=(_num_samples,_sample_size)).astype(np.float32)
+
+# Sample access functions
+def get_sample(index):
+    return _samples[index,:]
+def num_samples():
+    return _num_samples
+def sample_dims():
+    return (_sample_size,)
+
+# ==============================================
+# Setup LBANN experiment
+# ==============================================
+
+def setup_experiment(lbann):
+    """Construct LBANN experiment.
+
+    Args:
+        lbann (module): Module for LBANN Python frontend
+
+    """
+    mini_batch_size = num_samples()
+    trainer = lbann.Trainer(mini_batch_size)
+    model = construct_model(lbann)
+    data_reader = construct_data_reader(lbann)
+    optimizer = lbann.NoOptimizer()
+    return trainer, model, data_reader, optimizer
+
+def construct_model(lbann):
+    """Construct LBANN model.
+
+    Args:
+        lbann (module): Module for LBANN Python frontend
+
+    """
+
+    # Input data
+    x = lbann.Input(data_field='samples')
+    x_lbann = x
+
+    # Objects for LBANN model
+    metrics = []
+    callbacks = []
+
+    # ------------------------------------------
+    # Data-parallel weights layer
+    # ------------------------------------------
+    # Note: Weights are stored in one column of (STAR,STAR)
+    # distributed matrix
+
+    # Weights
+    weights_values = np.random.normal(size=_sample_dims).astype(np.float32)
+    weights_file = os.path.join(weights_dir, 'dataparallel_weights.npy')
+    np.save(weights_file, weights_values)
+
+    # LBANN implementation
+    x = lbann.Reshape(x_lbann, dims=tools.str_list(_sample_dims))
+    weights = lbann.Weights(
+        initializer=lbann.NumpyInitializer(file=weights_file),
+    )
+    weights = lbann.WeightsLayer(
+        weights=weights,
+        dims=tools.str_list(_sample_dims),
+    )
+    y = lbann.Multiply(x, weights)
+    z = lbann.L2Norm2(y)
+    metrics.append(lbann.Metric(z, name='data-parallel weights layer'))
+
+    # NumPy implementation
+    vals = []
+    for i in range(num_samples()):
+        x = get_sample(i).reshape(_sample_dims).astype(np.float64)
+        y = x * weights_values
+        z = tools.numpy_l2norm2(y)
+        vals.append(z)
+    val = np.mean(vals)
+    tol = 8 * val * np.finfo(np.float32).eps
+    callbacks.append(lbann.CallbackCheckMetric(
+        metric=metrics[-1].name,
+        lower_bound=val-tol,
+        upper_bound=val+tol,
+        error_on_failure=True,
+        execution_modes='test'))
+
+    # ------------------------------------------
+    # Data-parallel FC layer
+    # ------------------------------------------
+    # Note: Weights are stored in (STAR,STAR) distributed matrix
+
+    # Weights
+    output_size = 7
+    linearity = np.random.normal(size=(output_size, _sample_size)).astype(np.float32)
+    linearity = linearity.astype(np.float64)
+    bias = np.random.normal(size=output_size).astype(np.float32)
+    linearity_file = os.path.join(weights_dir, 'dataparallel_fc_linearity.npy')
+    bias_file = os.path.join(weights_dir, 'dataparallel_fc_bias.npy')
+    np.save(linearity_file, linearity)
+    np.save(bias_file, bias)
+
+    # LBANN implementation
+    x = x_lbann
+    linearity_weights \
+        = lbann.Weights(initializer=lbann.NumpyInitializer(file=linearity_file))
+    bias_weights \
+        = lbann.Weights(initializer=lbann.NumpyInitializer(file=bias_file))
+    y = lbann.FullyConnected(
+        x,
+        weights=(linearity_weights, bias_weights),
+        data_layout='data_parallel',
+        num_neurons=output_size,
+        has_bias=True,
+        transpose=False)
+    z = lbann.L2Norm2(y)
+    metrics.append(lbann.Metric(z, name='data-parallel FC layer'))
+
+    # NumPy implementation
+    vals = []
+    for i in range(num_samples()):
+        x = get_sample(i).astype(np.float64)
+        y = np.matmul(linearity, x) + bias
+        z = tools.numpy_l2norm2(y)
+        vals.append(z)
+    val = np.mean(vals)
+    tol = 8 * val * np.finfo(np.float32).eps
+    callbacks.append(lbann.CallbackCheckMetric(
+        metric=metrics[-1].name,
+        lower_bound=val-tol,
+        upper_bound=val+tol,
+        error_on_failure=True,
+        execution_modes='test'))
+
+    # ------------------------------------------
+    # Model-parallel FC layer
+    # ------------------------------------------
+    # Note: Weights are stored in (MC,MR) distributed matrix
+
+    # Weights
+    output_size = 9
+    linearity = np.random.normal(size=(output_size, _sample_size)).astype(np.float32)
+    bias = np.random.normal(size=output_size).astype(np.float32)
+    bias = bias.astype(np.float64)
+    linearity_file = os.path.join(weights_dir, 'modelparallel_fc_linearity.npy')
+    bias_file = os.path.join(weights_dir, 'modelparallel_fc_bias.npy')
+    np.save(linearity_file, linearity)
+    np.save(bias_file, bias)
+
+    # LBANN implementation
+    x = x_lbann
+    linearity_weights \
+        = lbann.Weights(initializer=lbann.NumpyInitializer(file=linearity_file))
+    bias_weights \
+        = lbann.Weights(initializer=lbann.NumpyInitializer(file=bias_file))
+    y = lbann.FullyConnected(
+        x,
+        weights=(linearity_weights, bias_weights),
+        data_layout='model_parallel',
+        num_neurons=output_size,
+        has_bias=True,
+        transpose=False)
+    z = lbann.L2Norm2(y)
+    metrics.append(lbann.Metric(z, name='model-parallel FC layer'))
+
+    # NumPy implementation
+    vals = []
+    for i in range(num_samples()):
+        x = get_sample(i).astype(np.float64)
+        y = np.matmul(linearity, x) + bias
+        z = tools.numpy_l2norm2(y)
+        vals.append(z)
+    val = np.mean(vals)
+    tol = 8 * val * np.finfo(np.float32).eps
+    callbacks.append(lbann.CallbackCheckMetric(
+        metric=metrics[-1].name,
+        lower_bound=val-tol,
+        upper_bound=val+tol,
+        error_on_failure=True,
+        execution_modes='test'))
+
+    # ------------------------------------------
+    # Construct model
+    # ------------------------------------------
+
+    num_epochs = 0
+    return lbann.Model(num_epochs,
+                       layers=lbann.traverse_layer_graph(x_lbann),
+                       metrics=metrics,
+                       callbacks=callbacks)
+
+def construct_data_reader(lbann):
+    """Construct Protobuf message for Python data reader.
+
+    The Python data reader will import the current Python file to
+    access the sample access functions.
+
+    Args:
+        lbann (module): Module for LBANN Python frontend
+
+    """
+
+    # Note: The training data reader should be removed when
+    # https://github.com/LLNL/lbann/issues/1098 is resolved.
+    message = lbann.reader_pb2.DataReader()
+    message.reader.extend([
+        tools.create_python_data_reader(
+            lbann,
+            current_file,
+            'get_sample',
+            'num_samples',
+            'sample_dims',
+            'train'
+        )
+    ])
+    message.reader.extend([
+        tools.create_python_data_reader(
+            lbann,
+            current_file,
+            'get_sample',
+            'num_samples',
+            'sample_dims',
+            'test'
+        )
+    ])
+    return message
+
+# ==============================================
+# Setup PyTest
+# ==============================================
+
+# Create test functions that can interact with PyTest
+for _test_func in tools.create_tests(setup_experiment, __file__):
+    globals()[_test_func.__name__] = _test_func
diff --git a/include/lbann/weights/initializer.hpp b/include/lbann/weights/initializer.hpp
index a5b1b38119d..ed9d4468368 100644
--- a/include/lbann/weights/initializer.hpp
+++ b/include/lbann/weights/initializer.hpp
@@ -63,6 +63,8 @@ class data_type_weights_initializer
   /** @brief The tensor type expected in this object. */
   using AbsDistMatrixType = El::AbstractDistMatrix<TensorDataType>;
 
+  ///@}
+
 public:
   data_type_weights_initializer() = default;
   virtual ~data_type_weights_initializer() = default;
@@ -75,7 +77,7 @@ class data_type_weights_initializer
 
 };
 
-/** @brief Fill weights with a constant value. */
+/** @brief Fill weights with a single constant value. */
 template <typename TensorDataType>
 class constant_initializer
   : public Cloneable<constant_initializer<TensorDataType>,
@@ -87,6 +89,8 @@ class constant_initializer
   /** @brief The tensor type expected in this object. */
   using AbsDistMatrixType = El::AbstractDistMatrix<TensorDataType>;
 
+  ///@}
+
 public:
   constant_initializer(TensorDataType value)
     : m_value(value)
@@ -106,6 +110,9 @@ class constant_initializer
  *
  *  The number of weight entries must exactly match the number of
  *  provided values.
+ *
+ *  @note Most weights are stored in row-major order. However, the
+ *  fully-connected layer's linearity weights are column-major.
  */
 template <typename TensorDataType>
 class value_initializer
@@ -118,6 +125,8 @@ class value_initializer
   /** @brief The tensor type expected in this object. */
   using AbsDistMatrixType = El::AbstractDistMatrix<TensorDataType>;
 
+  ///@}
+
 public:
   value_initializer(std::vector<TensorDataType> values)
     : m_values{std::move(values)}
@@ -132,6 +141,38 @@ class value_initializer
 
 };
 
+/** @brief Fill weights with values from a NumPy file.
+ *
+ *  Expects a .npy file with float32 or float64 values in C-style,
+ *  row-major order.
+ */
+template <typename TensorDataType>
+class numpy_initializer
+  : public Cloneable<numpy_initializer<TensorDataType>,
+                     data_type_weights_initializer<TensorDataType>> {
+public:
+  /** @name Public Types */
+  ///@{
+
+  /** @brief The tensor type expected in this object. */
+  using AbsDistMatrixType = El::AbstractDistMatrix<TensorDataType>;
+
+  ///@}
+
+public:
+  numpy_initializer(std::string file)
+    : m_file{std::move(file)}
+  {}
+  std::string get_type() const override { return "NumPy"; }
+  void fill(AbsDistMatrixType& matrix) override;
+
+private:
+
+  /** NumPy file */
+  std::string m_file;
+
+};
+
 /** @brief Draw weights values from a uniform random distribution. */
 template <typename TensorDataType>
 class uniform_initializer
@@ -144,6 +185,8 @@ class uniform_initializer
   /** @brief The tensor type expected in this object. */
   using AbsDistMatrixType = El::AbstractDistMatrix<TensorDataType>;
 
+  ///@}
+
  public:
   uniform_initializer(TensorDataType min = El::To<TensorDataType>(0),
                       TensorDataType max = El::To<TensorDataType>(1))
@@ -204,6 +247,10 @@ template <typename TensorDataType>
 std::unique_ptr<weights_initializer>
 build_value_initializer_from_pbuf(google::protobuf::Message const& msg);
 
+template <typename TensorDataType>
+std::unique_ptr<weights_initializer>
+build_numpy_initializer_from_pbuf(google::protobuf::Message const& msg);
+
 template <typename TensorDataType>
 std::unique_ptr<weights_initializer>
 build_uniform_initializer_from_pbuf(google::protobuf::Message const& msg);
@@ -217,6 +264,7 @@ build_normal_initializer_from_pbuf(google::protobuf::Message const& msg);
   extern template class data_type_weights_initializer<T>; \
   extern template class constant_initializer<T>;          \
   extern template class value_initializer<T>;             \
+  extern template class numpy_initializer<T>;             \
   extern template class uniform_initializer<T>;           \
   extern template class normal_initializer<T>
 
diff --git a/src/proto/factories/weights_factory.cpp b/src/proto/factories/weights_factory.cpp
index f094fd7973b..a18232d5ff3 100644
--- a/src/proto/factories/weights_factory.cpp
+++ b/src/proto/factories/weights_factory.cpp
@@ -71,6 +71,8 @@ class factory_manager
                               build_constant_initializer_from_pbuf<T>);
     factory_.register_builder("ValueInitializer",
                               build_value_initializer_from_pbuf<T>);
+    factory_.register_builder("NumpyInitializer",
+                              build_numpy_initializer_from_pbuf<T>);
     factory_.register_builder("UniformInitializer",
                               build_uniform_initializer_from_pbuf<T>);
     factory_.register_builder("NormalInitializer",
diff --git a/src/proto/weights.proto b/src/proto/weights.proto
index 21bd6309a70..33f31a4ca02 100644
--- a/src/proto/weights.proto
+++ b/src/proto/weights.proto
@@ -42,23 +42,44 @@ message Initializer {
   oneof initializer_type {
     ConstantInitializer constant_initializer = 20;
     ValueInitializer value_initializer = 21;
-    UniformInitializer uniform_initializer = 22;
-    NormalInitializer normal_initializer = 23;
-    GlorotNormalInitializer glorot_normal_initializer = 24;
-    GlorotUniformInitializer glorot_uniform_initializer = 25;
-    HeNormalInitializer he_normal_initializer = 26;
-    HeUniformInitializer he_uniform_initializer = 27;
-    LeCunNormalInitializer lecun_normal_initializer = 28;
-    LeCunUniformInitializer lecun_uniform_initializer = 29;
+    NumpyInitializer numpy_initializer = 22;
+    UniformInitializer uniform_initializer = 23;
+    NormalInitializer normal_initializer = 24;
+    GlorotNormalInitializer glorot_normal_initializer = 25;
+    GlorotUniformInitializer glorot_uniform_initializer = 26;
+    HeNormalInitializer he_normal_initializer = 27;
+    HeUniformInitializer he_uniform_initializer = 28;
+    LeCunNormalInitializer lecun_normal_initializer = 29;
+    LeCunUniformInitializer lecun_uniform_initializer = 30;
   }
 
   // Weight initializers
+
+  /** @brief Fill weights with a single constant value. */
   message ConstantInitializer {
     double value = 1;
   }
+  /** @brief Fill weights with values from a list.
+   *
+   *  The number of weight entries must exactly match the number of
+   *  provided values.
+   *
+   *  @note Most weights are stored in row-major order. However, the
+   *  fully-connected layer's linearity weights are column-major.
+   */
   message ValueInitializer {
+    /// Space-separated list of values
     string values = 1;
   }
+  /** @brief Fill weights with values from a NumPy file.
+   *
+   *  Expects a .npy file with float32 or float64 values in C-style,
+   *  row-major order.
+   */
+  message NumpyInitializer {
+    /// NumPy file
+    string file = 1;
+  }
   message UniformInitializer {
     double min = 1;
     double max = 2;
diff --git a/src/weights/initializer.cpp b/src/weights/initializer.cpp
index a5f9840ae90..11b5c665ab7 100644
--- a/src/weights/initializer.cpp
+++ b/src/weights/initializer.cpp
@@ -33,6 +33,9 @@
 #include "lbann/utils/random.hpp"
 
 #include <weights.pb.h>
+#ifdef LBANN_HAS_CNPY
+#include <cnpy.h>
+#endif // LBANN_HAS_CNPY
 
 #include <sstream>
 
@@ -101,6 +104,104 @@ void value_initializer<TensorDataType>::fill(AbsDistMatrixType& matrix) {
 
 }
 
+template <typename TensorDataType>
+void numpy_initializer<TensorDataType>::fill(AbsDistMatrixType& matrix) {
+#ifndef LBANN_HAS_CNPY
+  LBANN_ERROR("CNPY not detected");
+#else
+
+  // Load NumPy file
+  cnpy::NpyArray array = cnpy::npy_load(m_file);
+  const size_t num_values = array.num_bytes() / array.word_size;
+  if (matrix.Height() * matrix.Width() != (El::Int) num_values) {
+    LBANN_ERROR(
+      "NumPy weight initializer attempted to initialize a ",
+      matrix.Height()," x ",matrix.Width()," weights matrix, "
+      "but ",m_file," contains ",num_values," values");
+  }
+  if (array.fortran_order) {
+    LBANN_ERROR(
+      "NumPy weight initializer does not support Fortran order ",
+      "(error while loading ",m_file,")");
+  }
+
+  // Extract weight values from NumPy array
+  // Note: Consider viewing instead of copying when the array is
+  // already in the right datatype.
+  std::vector<TensorDataType> values(num_values);
+  switch (array.word_size) {
+  case 4:
+  {
+    const auto* src = array.data<float>();
+    auto* dst = values.data();
+    LBANN_OMP_PARALLEL_FOR
+    for (size_t i=0; i<num_values; ++i) {
+      dst[i] = src[i];
+    }
+    break;
+  }
+  case 8:
+  {
+    const auto* src = array.data<double>();
+    auto* dst = values.data();
+    LBANN_OMP_PARALLEL_FOR
+    for (size_t i=0; i<num_values; ++i) {
+      dst[i] = src[i];
+    }
+    break;
+  }
+  default:
+    LBANN_ERROR(
+      "NumPy weight initializer only supports float32 and float64 data",
+      "(error while loading ",m_file,")");
+  }
+
+  // Construct CPU matrix from weight values
+  using CPUMatType = El::DistMatrix<TensorDataType, El::STAR, El::STAR, El::ELEMENT, El::Device::CPU>;
+  CPUMatType cpu_matrix(matrix.Grid(), matrix.Root());
+  if (matrix.Width() == 1) {
+    cpu_matrix.LockedAttach(
+      matrix.Height(),
+      matrix.Width(),
+      matrix.Grid(),
+      matrix.ColAlign(),
+      matrix.RowAlign(),
+      values.data(),
+      matrix.Height(),
+      matrix.Root());
+  }
+  else {
+    // Weights in fully-connected layer are in Fortran-order. Need to
+    // transpose NumPy array before copying in Hydrogen matrix
+    if (array.shape.size() != 2) {
+      LBANN_ERROR(
+        "NumPy weight initializer attempted to initialize a ",
+        matrix.Height()," x ",matrix.Width()," weights matrix, "
+        "but ",m_file," contains a ",array.shape.size(),"-D array");
+    }
+    if ((El::Int) array.shape[0] != matrix.Height()
+        || (El::Int) array.shape[1] != matrix.Width()) {
+      LBANN_ERROR(
+        "NumPy weight initializer attempted to initialize a ",
+        matrix.Height()," x ",matrix.Width()," weights matrix, "
+        "but ",m_file," contains a ",
+        array.shape[0]," x ",array.shape[1], " array");
+    }
+    El::Matrix<TensorDataType, El::Device::CPU> cpu_matrix_trans(
+      matrix.Width(),
+      matrix.Height(),
+      values.data(),
+      matrix.Width());
+    cpu_matrix.Resize(matrix.Height(), matrix.Width());
+    El::Transpose(cpu_matrix_trans, cpu_matrix.Matrix());
+  }
+
+  // Copy CPU matrix to weights matrix
+  El::Copy(cpu_matrix, matrix);
+
+#endif // LBANN_HAS_CNPY
+}
+
 template <typename TensorDataType>
 description uniform_initializer<TensorDataType>::get_description() const {
   auto desc = data_type_weights_initializer<TensorDataType>::get_description();
@@ -151,6 +252,14 @@ build_value_initializer_from_pbuf(google::protobuf::Message const& msg) {
   return make_unique<value_initializer<TensorDataType>>(parse_list<TensorDataType>(params.values()));
 }
 
+template <typename TensorDataType>
+std::unique_ptr<weights_initializer>
+build_numpy_initializer_from_pbuf(google::protobuf::Message const& msg) {
+  const auto& params =
+    dynamic_cast<lbann_data::Initializer::NumpyInitializer const&>(msg);
+  return make_unique<numpy_initializer<TensorDataType>>(params.file());
+}
+
 template <typename TensorDataType>
 std::unique_ptr<weights_initializer>
 build_uniform_initializer_from_pbuf(google::protobuf::Message const& msg) {
@@ -184,6 +293,7 @@ build_normal_initializer_from_pbuf(google::protobuf::Message const& msg) {
   template class data_type_weights_initializer<T>;                           \
   template class constant_initializer<T>;                                    \
   template class value_initializer<T>;                                       \
+  template class numpy_initializer<T>;                                       \
   template class uniform_initializer<T>;                                     \
   template class normal_initializer<T>;                                      \
   template std::unique_ptr<weights_initializer>                              \
@@ -191,6 +301,8 @@ build_normal_initializer_from_pbuf(google::protobuf::Message const& msg) {
   template std::unique_ptr<weights_initializer>                              \
   build_value_initializer_from_pbuf<T>(google::protobuf::Message const&);    \
   template std::unique_ptr<weights_initializer>                              \
+  build_numpy_initializer_from_pbuf<T>(google::protobuf::Message const&);    \
+  template std::unique_ptr<weights_initializer>                              \
   build_uniform_initializer_from_pbuf<T>(google::protobuf::Message const&);  \
   template std::unique_ptr<weights_initializer>                              \
   build_normal_initializer_from_pbuf<T>(google::protobuf::Message const&)

From 2ae621ad771d4af9a0787987396b74f1c49b9a92 Mon Sep 17 00:00:00 2001
From: Michael Wyatt <wyatt5@llnl.gov>
Date: Thu, 28 Oct 2021 14:10:57 -0700
Subject: [PATCH 25/37] Rename LBANN options macros (#1991)

* ran script to prepend LBANN_OPTION_ to all option macros

* fixed options that auto-rename script missed

* fixed options that already had prepended LBANN_OPTION_

* fixed whitespace changes

* restored accidentally removed section of file

* reverting some unintended changes from auto rename script
---
 include/lbann/callbacks/perturb_weights.hpp   | 204 ++++-----
 include/lbann/data_readers/data_reader.hpp    |   2 +-
 .../data_reader_sample_list_impl.hpp          |   4 +-
 .../lbann/data_readers/sample_list_impl.hpp   |   2 +-
 include/lbann/utils/options.hpp               | 218 +++++-----
 model_zoo/jag_utils/build_index.cpp           |  12 +-
 .../jag_utils/build_sample_id_mapping.cpp     |   2 +-
 .../jag_utils/check_for_duplicate_samples.cpp |   4 +-
 model_zoo/jag_utils/check_images.cpp          |   8 +-
 .../jag_utils/compute_hydra_normalization.cpp |   6 +-
 .../jag_utils/compute_min_max_images.cpp      |  12 +-
 .../compute_per_channel_image_avg_min_max.cpp |   6 +-
 model_zoo/jag_utils/convert.cpp               |  12 +-
 .../jag_utils/convert_npz_to_conduit.cpp      |   4 +-
 model_zoo/jag_utils/detect_corruption.cpp     |   4 +-
 .../jag_utils/extract_random_samples.cpp      |  34 +-
 .../jag_utils/generate_corrupt_samples.cpp    |   4 +-
 model_zoo/jag_utils/load_balance.cpp          |  18 +-
 model_zoo/jag_utils/load_bundle2raw.cpp       |  10 +-
 model_zoo/jag_utils/select_samples.cpp        |  66 +--
 model_zoo/jag_utils/test_conduit_hdf5.cpp     |   4 +-
 model_zoo/jag_utils/test_reading_speed.cpp    |   8 +-
 model_zoo/lbann.cpp                           |  16 +-
 model_zoo/lbann_cycgan.cpp                    |   4 +-
 model_zoo/tests/conduit_timing_test.cpp       |   2 +-
 src/callbacks/perturb_weights.cpp             | 390 +++++++++---------
 src/callbacks/print_statistics.cpp            |   2 +-
 src/callbacks/timer.cpp                       |   2 +-
 src/data_coordinator/data_coordinator.cpp     |   8 +-
 src/data_readers/data_reader.cpp              |  10 +-
 src/data_readers/data_reader_HDF5.cpp         |   4 +-
 src/data_readers/data_reader_hdf5_legacy.cpp  |   4 +-
 src/data_readers/data_reader_image.cpp        |  20 +-
 src/data_readers/data_reader_jag_conduit.cpp  |  12 +-
 .../data_reader_npz_ras_lipid.cpp             |  20 +-
 .../data_reader_numpy_npz_conduit.cpp         |  10 +-
 src/data_readers/data_reader_smiles.cpp       |  12 +-
 src/data_store/data_store_conduit.cpp         |  28 +-
 src/layers/data_type_layer.cpp                |   2 +-
 src/optimizers/adam.cpp                       |   2 +-
 src/optimizers/sgd.cpp                        |   2 +-
 src/proto/factories/layer_factory.cpp         |   8 +-
 src/proto/proto_common.cpp                    |  84 ++--
 src/utils/lbann_library.cpp                   |  36 +-
 src/utils/options.cpp                         | 220 +++++-----
 src/utils/protobuf_utils.cpp                  |  12 +-
 src/utils/stack_profiler.cpp                  |   4 +-
 src/weights/data_type_weights.cpp             |   2 +-
 48 files changed, 779 insertions(+), 781 deletions(-)

diff --git a/include/lbann/callbacks/perturb_weights.hpp b/include/lbann/callbacks/perturb_weights.hpp
index 65c624470ed..528e9c81449 100644
--- a/include/lbann/callbacks/perturb_weights.hpp
+++ b/include/lbann/callbacks/perturb_weights.hpp
@@ -1,102 +1,102 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef LBANN_CALLBACKS_CALLBACK_PERTURB_WEIGHTS_HPP_INCLUDED
-#define LBANN_CALLBACKS_CALLBACK_PERTURB_WEIGHTS_HPP_INCLUDED
-
-#include "lbann/callbacks/callback.hpp"
-#include "lbann/weights/weights.hpp"
-
-namespace lbann {
-namespace callback {
-
-/** @brief Perturb values in a weights tensor.
- *
- *  Each entry of the weights tensor has a probability of being
- *  perturbed by a normal random number. The resulting values are
- *  clamped within a range.
- */
-class perturb_weights : public callback_base {
-public:
-
-  /**
-   *  @param batch_interval Number of training mini-batch steps
-   *                        between perturbations
-   *  @param output_name    Name of weights being perturbed
-   *  @param upper          Upper bound for weights values
-   *  @param lower          Lower bound for weights values
-   *  @param scale          Standard deviation of normal perturbations
-   *  @param perturb_probability    Probability of applying
-   *                        perturbation to a given weights value
-   */
-  perturb_weights(EvalType upper, EvalType lower, EvalType scale, EvalType perturb_probability,
-		  std::string output_name,
-                  El::Int batch_interval = 1);
-
-  perturb_weights* copy() const override { return new perturb_weights(*this); }
-  std::string name() const override { return "perturb weights"; }
-
-  void setup(model* m) override;
-  void on_batch_begin(model* m) override;
-
-  /** @name Serialization */
-  ///@{
-
-  /** @brief Store state to archive for checkpoint and restart */
-  template <class Archive> void serialize(Archive & ar);
-
-  ///@}
-
-private:
-
-  friend class cereal::access;
-  perturb_weights();
-
-  /// @brief Name of weights being perturbed
-  std::string m_output_name;
-
-  /// @brief Upper bound for weights values
-  EvalType m_upper;
-  /// @brief Lower bound for weights values
-  EvalType m_lower;
-  /// @brief Standard deviation of normal perturbations
-  EvalType m_scale;
-  /// @brief Probability of applying perturbation to a given value
-  EvalType m_perturb_probability;
-
-  void perturb(model& m);
-
-};
-
-// Builder function
-std::unique_ptr<callback_base>
-build_perturb_weights_callback_from_pbuf(
-  const google::protobuf::Message&, std::shared_ptr<lbann_summary> const&);
-
-} // namespace callback
-} // namespace lbann
-
-#endif // LBANN_CALLBACKS_CALLBACK_PERTURB_WEIGHTS_HPP_INCLUDED
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef LBANN_CALLBACKS_CALLBACK_PERTURB_WEIGHTS_HPP_INCLUDED
+#define LBANN_CALLBACKS_CALLBACK_PERTURB_WEIGHTS_HPP_INCLUDED
+
+#include "lbann/callbacks/callback.hpp"
+#include "lbann/weights/weights.hpp"
+
+namespace lbann {
+namespace callback {
+
+/** @brief Perturb values in a weights tensor.
+ *
+ *  Each entry of the weights tensor has a probability of being
+ *  perturbed by a normal random number. The resulting values are
+ *  clamped within a range.
+ */
+class perturb_weights : public callback_base {
+public:
+
+  /**
+   *  @param batch_interval Number of training mini-batch steps
+   *                        between perturbations
+   *  @param output_name    Name of weights being perturbed
+   *  @param upper          Upper bound for weights values
+   *  @param lower          Lower bound for weights values
+   *  @param scale          Standard deviation of normal perturbations
+   *  @param perturb_probability    Probability of applying
+   *                        perturbation to a given weights value
+   */
+  perturb_weights(EvalType upper, EvalType lower, EvalType scale, EvalType perturb_probability,
+		  std::string output_name,
+                  El::Int batch_interval = 1);
+
+  perturb_weights* copy() const override { return new perturb_weights(*this); }
+  std::string name() const override { return "perturb weights"; }
+
+  void setup(model* m) override;
+  void on_batch_begin(model* m) override;
+
+  /** @name Serialization */
+  ///@{
+
+  /** @brief Store state to archive for checkpoint and restart */
+  template <class Archive> void serialize(Archive & ar);
+
+  ///@}
+
+private:
+
+  friend class cereal::access;
+  perturb_weights();
+
+  /// @brief Name of weights being perturbed
+  std::string m_output_name;
+
+  /// @brief Upper bound for weights values
+  EvalType m_upper;
+  /// @brief Lower bound for weights values
+  EvalType m_lower;
+  /// @brief Standard deviation of normal perturbations
+  EvalType m_scale;
+  /// @brief Probability of applying perturbation to a given value
+  EvalType m_perturb_probability;
+
+  void perturb(model& m);
+
+};
+
+// Builder function
+std::unique_ptr<callback_base>
+build_perturb_weights_callback_from_pbuf(
+  const google::protobuf::Message&, std::shared_ptr<lbann_summary> const&);
+
+} // namespace callback
+} // namespace lbann
+
+#endif // LBANN_CALLBACKS_CALLBACK_PERTURB_WEIGHTS_HPP_INCLUDED
diff --git a/include/lbann/data_readers/data_reader.hpp b/include/lbann/data_readers/data_reader.hpp
index 8f804b696b8..2079111395d 100644
--- a/include/lbann/data_readers/data_reader.hpp
+++ b/include/lbann/data_readers/data_reader.hpp
@@ -74,7 +74,7 @@ class generic_data_reader {
    * ctor
    */
   generic_data_reader(bool shuffle = true)
-    : m_verbose(global_argument_parser().get<bool>(VERBOSE)),
+    : m_verbose(global_argument_parser().get<bool>(LBANN_OPTION_VERBOSE)),
       m_data_store(nullptr),
       m_comm(nullptr),
       m_mini_batch_size(0),
diff --git a/include/lbann/data_readers/data_reader_sample_list_impl.hpp b/include/lbann/data_readers/data_reader_sample_list_impl.hpp
index 748d90ac48e..3659582fbc2 100644
--- a/include/lbann/data_readers/data_reader_sample_list_impl.hpp
+++ b/include/lbann/data_readers/data_reader_sample_list_impl.hpp
@@ -105,7 +105,7 @@ void data_reader_sample_list<SampleListT>::load_list_of_samples(
 
   // dah: I've not a clue what this next block does;
   //      is it a hack that should come out?
-  if (this->m_keep_sample_order || arg_parser.get<bool>(KEEP_SAMPLE_ORDER)) {
+  if (this->m_keep_sample_order || arg_parser.get<bool>(LBANN_OPTION_KEEP_SAMPLE_ORDER)) {
     m_sample_list.keep_sample_order(true);
   }
   else {
@@ -113,7 +113,7 @@ void data_reader_sample_list<SampleListT>::load_list_of_samples(
   }
 
   // Load the sample list
-  if (arg_parser.get<bool>(LOAD_FULL_SAMPLE_LIST_ONCE)) {
+  if (arg_parser.get<bool>(LBANN_OPTION_LOAD_FULL_SAMPLE_LIST_ONCE)) {
     std::vector<char> buffer;
     if (m_comm->am_trainer_master()) {
       load_file(sample_list_file, buffer);
diff --git a/include/lbann/data_readers/sample_list_impl.hpp b/include/lbann/data_readers/sample_list_impl.hpp
index 8fa664f5b62..facbf489271 100644
--- a/include/lbann/data_readers/sample_list_impl.hpp
+++ b/include/lbann/data_readers/sample_list_impl.hpp
@@ -492,7 +492,7 @@ inline void sample_list<sample_name_t>
 ::all_gather_archive(const std::string &archive,
                      std::vector<std::string>& gathered_archive,
                      lbann_comm& comm) {
-  if (!global_argument_parser().get<bool>(ALL_GATHER_OLD)) {
+  if (!global_argument_parser().get<bool>(LBANN_OPTION_ALL_GATHER_OLD)) {
     all_gather_archive_new(archive, gathered_archive, comm);
     return;
   }
diff --git a/include/lbann/utils/options.hpp b/include/lbann/utils/options.hpp
index d0836c541b2..8f6d0ec30d1 100644
--- a/include/lbann/utils/options.hpp
+++ b/include/lbann/utils/options.hpp
@@ -12,133 +12,133 @@ namespace lbann {
 
 /****** std options ******/
 // Bool flags
-#define DISABLE_BACKGROUND_IO_ACTIVITY "disable_background_io_activity"
-#define DISABLE_CUDA "disable_cuda"
-#define LOAD_MODEL_WEIGHTS_DIR_IS_COMPLETE "load_model_weights_dir_is_complete"
-#define LTFB_ALLOW_GLOBAL_STATISTICS "LTFB Allow global statistics"
-#define LTFB_VERBOSE "ltfb_verbose"
-#define NO_IM_COMM "no_im_comm"
-#define PRELOAD_DATA_STORE "preload_data_store"
-#define PRINT_AFFINITY "print_affinity"
-#define SERIALIZE_IO "serialize_io"
-#define ST_FULL_TRACE "st_full_trace"
-#define ST_ON "st_on"
-#define USE_CUBLAS_TENSOR_OPS "use_cublas_tensor_ops"
-#define USE_CUDNN_TENSOR_OPS "use_cudnn_tensor_ops"
-#define USE_DATA_STORE "use_data_store"
-#define USE_LTFB "ltfb"
-#define VERBOSE "verbose"
-#define WRITE_SAMPLE_LIST "write_sample_list"
-#define USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP "Use Hydrogen's default memory mode for GPU buffers in forward prop"
+#define LBANN_OPTION_DISABLE_BACKGROUND_IO_ACTIVITY "disable_background_io_activity"
+#define LBANN_OPTION_DISABLE_CUDA "disable_cuda"
+#define LBANN_OPTION_LOAD_MODEL_WEIGHTS_DIR_IS_COMPLETE "load_model_weights_dir_is_complete"
+#define LBANN_OPTION_LTFB_ALLOW_GLOBAL_STATISTICS "LTFB Allow global statistics"
+#define LBANN_OPTION_LTFB_VERBOSE "ltfb_verbose"
+#define LBANN_OPTION_NO_IM_COMM "no_im_comm"
+#define LBANN_OPTION_PRELOAD_DATA_STORE "preload_data_store"
+#define LBANN_OPTION_PRINT_AFFINITY "print_affinity"
+#define LBANN_OPTION_SERIALIZE_IO "serialize_io"
+#define LBANN_OPTION_ST_FULL_TRACE "st_full_trace"
+#define LBANN_OPTION_ST_ON "st_on"
+#define LBANN_OPTION_USE_CUBLAS_TENSOR_OPS "use_cublas_tensor_ops"
+#define LBANN_OPTION_USE_CUDNN_TENSOR_OPS "use_cudnn_tensor_ops"
+#define LBANN_OPTION_USE_DATA_STORE "use_data_store"
+#define LBANN_OPTION_USE_LTFB "ltfb"
+#define LBANN_OPTION_VERBOSE "verbose"
+#define LBANN_OPTION_WRITE_SAMPLE_LIST "write_sample_list"
+#define LBANN_OPTION_USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP "Use Hydrogen's default memory mode for GPU buffers in forward prop"
 #define LBANN_OPTION_INIT_SHMEM "Initialize SHMEM when initializing LBANN"
 #define LBANN_OPTION_INIT_NVSHMEM "Initialize NVSHMEM when initializing LBANN"
 
 // Input options
-#define CKPT_DIR "ckpt_dir"
-#define HYDROGEN_BLOCK_SIZE "hydrogen_block_size"
-#define LOAD_MODEL_WEIGHTS_DIR "load_model_weights_dir"
-#define MAX_RNG_SEEDS_DISPLAY "RNG seeds per trainer to display"
-#define METADATA "metadata"
-#define MINI_BATCH_SIZE "mini_batch_size"
-#define MODEL "model"
-#define NUM_EPOCHS "num_epochs"
-#define NUM_IO_THREADS "Num. IO threads"
-#define NUM_PARALLEL_READERS "num_parallel_readers"
-#define NUM_TEST_SAMPLES "Num test samples"
-#define NUM_TRAIN_SAMPLES "Num train samples"
-#define NUM_VALIDATE_SAMPLES "Num validate samples"
-#define OPTIMIZER "optimizer"
-#define PROCS_PER_TRAINER "Processes per trainer"
-#define PROTOTEXT "prototext"
-#define RANDOM_SEED "random_seed"
-#define READER "reader"
-#define RESTART_DIR "restart_dir"
-#define TRAINER_CREATE_TWO_MODELS "Create two models in Sub-grid parallelism"
-#define TRAINER_GRID_HEIGHT "Height of 2D process grid for each trainer"
-#define TRAINER_PRIMARY_GRID_SIZE "Primary Grid Size per trainer"
+#define LBANN_OPTION_CKPT_DIR "ckpt_dir"
+#define LBANN_OPTION_HYDROGEN_BLOCK_SIZE "hydrogen_block_size"
+#define LBANN_OPTION_LOAD_MODEL_WEIGHTS_DIR "load_model_weights_dir"
+#define LBANN_OPTION_MAX_RNG_SEEDS_DISPLAY "RNG seeds per trainer to display"
+#define LBANN_OPTION_METADATA "metadata"
+#define LBANN_OPTION_MINI_BATCH_SIZE "mini_batch_size"
+#define LBANN_OPTION_MODEL "model"
+#define LBANN_OPTION_NUM_EPOCHS "num_epochs"
+#define LBANN_OPTION_NUM_IO_THREADS "Num. IO threads"
+#define LBANN_OPTION_NUM_PARALLEL_READERS "num_parallel_readers"
+#define LBANN_OPTION_NUM_TEST_SAMPLES "Num test samples"
+#define LBANN_OPTION_NUM_TRAIN_SAMPLES "Num train samples"
+#define LBANN_OPTION_NUM_VALIDATE_SAMPLES "Num validate samples"
+#define LBANN_OPTION_OPTIMIZER "optimizer"
+#define LBANN_OPTION_PROCS_PER_TRAINER "Processes per trainer"
+#define LBANN_OPTION_PROTOTEXT "prototext"
+#define LBANN_OPTION_RANDOM_SEED "random_seed"
+#define LBANN_OPTION_READER "reader"
+#define LBANN_OPTION_RESTART_DIR "restart_dir"
+#define LBANN_OPTION_TRAINER_CREATE_TWO_MODELS "Create two models in Sub-grid parallelism"
+#define LBANN_OPTION_TRAINER_GRID_HEIGHT "Height of 2D process grid for each trainer"
+#define LBANN_OPTION_TRAINER_PRIMARY_GRID_SIZE "Primary Grid Size per trainer"
 
 /****** datastore options ******/
 // Bool flags
-#define DATA_STORE_CACHE "data_store_cache"
-#define DATA_STORE_DEBUG "data_store_debug"
-#define DATA_STORE_FAIL "data_store_fail"
-#define DATA_STORE_MIN_MAX_TIMING "data_store_min_max_timing"
-#define DATA_STORE_NO_THREAD "data_store_no_thread"
-#define DATA_STORE_PROFILE "data_store_profile"
-#define DATA_STORE_SPILL "data_store_spill"
-#define DATA_STORE_TEST_CACHE "data_store_test_cache"
-#define DATA_STORE_TEST_CHECKPOINT "data_store_test_checkpoint"
+#define LBANN_OPTION_DATA_STORE_CACHE "data_store_cache"
+#define LBANN_OPTION_DATA_STORE_DEBUG "data_store_debug"
+#define LBANN_OPTION_DATA_STORE_FAIL "data_store_fail"
+#define LBANN_OPTION_DATA_STORE_MIN_MAX_TIMING "data_store_min_max_timing"
+#define LBANN_OPTION_DATA_STORE_NO_THREAD "data_store_no_thread"
+#define LBANN_OPTION_DATA_STORE_PROFILE "data_store_profile"
+#define LBANN_OPTION_DATA_STORE_SPILL "data_store_spill"
+#define LBANN_OPTION_DATA_STORE_TEST_CACHE "data_store_test_cache"
+#define LBANN_OPTION_DATA_STORE_TEST_CHECKPOINT "data_store_test_checkpoint"
 
 /****** datareader options ******/
 // Bool flags
-#define ALL_GATHER_OLD "all_gather_old"
-#define CHECK_DATA "check_data"
-#define CREATE_TARBALL "create_tarball"
-#define DISABLE_SIGNAL_HANDLER "disable_signal_handler"
-#define DEBUG_CONCATENATE "debug_concatenate"
-#define EXIT_AFTER_SETUP "exit_after_setup"
-#define GENERATE_MULTI_PROTO "generate_multi_proto"
-#define KEEP_SAMPLE_ORDER "keep_sample_order"
-#define KEEP_PACKED_FIELDS "keep_packed_fields"
-#define LOAD_FULL_SAMPLE_LIST_ONCE "load_full_sample_list_once"
-#define MAKE_TEST_FAIL "make_test_fail"
-#define NODE_SIZES_VARY "node_sizes_vary"
+#define LBANN_OPTION_ALL_GATHER_OLD "all_gather_old"
+#define LBANN_OPTION_CHECK_DATA "check_data"
+#define LBANN_OPTION_CREATE_TARBALL "create_tarball"
+#define LBANN_OPTION_DISABLE_SIGNAL_HANDLER "disable_signal_handler"
+#define LBANN_OPTION_DEBUG_CONCATENATE "debug_concatenate"
+#define LBANN_OPTION_EXIT_AFTER_SETUP "exit_after_setup"
+#define LBANN_OPTION_GENERATE_MULTI_PROTO "generate_multi_proto"
+#define LBANN_OPTION_KEEP_SAMPLE_ORDER "keep_sample_order"
+#define LBANN_OPTION_KEEP_PACKED_FIELDS "keep_packed_fields"
+#define LBANN_OPTION_LOAD_FULL_SAMPLE_LIST_ONCE "load_full_sample_list_once"
+#define LBANN_OPTION_MAKE_TEST_FAIL "make_test_fail"
+#define LBANN_OPTION_NODE_SIZES_VARY "node_sizes_vary"
 #define LBANN_OPTION_QUIET "quiet"
-#define STACK_TRACE_TO_FILE "stack_trace_to_file"
-#define TEST_ENCODE "test_encode"
-#define WRITE_SAMPLE_LABEL_LIST "write_sample_label_list"
-#define Z_SCORE "z_score"
+#define LBANN_OPTION_STACK_TRACE_TO_FILE "stack_trace_to_file"
+#define LBANN_OPTION_TEST_ENCODE "test_encode"
+#define LBANN_OPTION_WRITE_SAMPLE_LABEL_LIST "write_sample_label_list"
+#define LBANN_OPTION_Z_SCORE "z_score"
 
 // Input options
-#define ABSOLUTE_SAMPLE_COUNT "absolute_sample_count"
-#define DATA_FILEDIR "data_filedir"
-#define DATA_FILEDIR_TEST "data_filedir_test"
-#define DATA_FILEDIR_TRAIN "data_filedir_train"
-#define DATA_FILEDIR_VALIDATE "data_filedir_validate"
-#define DATA_FILENAME_TEST "data_filename_test"
-#define DATA_FILENAME_TRAIN "data_filename_train"
-#define DATA_FILENAME_VALIDATE "data_filename_validate"
-#define DATA_READER_PERCENT "data_reader_percent"
-#define DELIMITER "delimiter"
-#define IMAGE_SIZES_FILENAME "image_sizes_filename"
-#define LABEL_FILENAME_TEST "label_filename_test"
-#define LABEL_FILENAME_TRAIN "label_filename_train"
-#define LABEL_FILENAME_VALIDATE "label_filename_validate"
-#define NORMALIZATION "normalization"
-#define N_LINES "n_lines"
-#define PAD_INDEX "pad_index"
-#define PILOT2_READ_FILE_SIZES "pilot2_read_file_sizes"
-#define PILOT2_SAVE_FILE_SIZES "pilot2_save_file_sizes"
-#define SAMPLE_LIST_TEST "sample_list_test"
-#define SAMPLE_LIST_TRAIN "sample_list_train"
-#define SAMPLE_LIST_VALIDATE "sample_list_validate"
-#define SEQUENCE_LENGTH "sequence_length"
-#define SMILES_BUFFER_SIZE "smiles_buffer_size"
-#define TEST_TARBALL "test_tarball"
-#define VOCAB "vocab"
+#define LBANN_OPTION_ABSOLUTE_SAMPLE_COUNT "absolute_sample_count"
+#define LBANN_OPTION_DATA_FILEDIR "data_filedir"
+#define LBANN_OPTION_DATA_FILEDIR_TEST "data_filedir_test"
+#define LBANN_OPTION_DATA_FILEDIR_TRAIN "data_filedir_train"
+#define LBANN_OPTION_DATA_FILEDIR_VALIDATE "data_filedir_validate"
+#define LBANN_OPTION_DATA_FILENAME_TEST "data_filename_test"
+#define LBANN_OPTION_DATA_FILENAME_TRAIN "data_filename_train"
+#define LBANN_OPTION_DATA_FILENAME_VALIDATE "data_filename_validate"
+#define LBANN_OPTION_DATA_READER_PERCENT "data_reader_percent"
+#define LBANN_OPTION_DELIMITER "delimiter"
+#define LBANN_OPTION_IMAGE_SIZES_FILENAME "image_sizes_filename"
+#define LBANN_OPTION_LABEL_FILENAME_TEST "label_filename_test"
+#define LBANN_OPTION_LABEL_FILENAME_TRAIN "label_filename_train"
+#define LBANN_OPTION_LABEL_FILENAME_VALIDATE "label_filename_validate"
+#define LBANN_OPTION_NORMALIZATION "normalization"
+#define LBANN_OPTION_N_LINES "n_lines"
+#define LBANN_OPTION_PAD_INDEX "pad_index"
+#define LBANN_OPTION_PILOT2_READ_FILE_SIZES "pilot2_read_file_sizes"
+#define LBANN_OPTION_PILOT2_SAVE_FILE_SIZES "pilot2_save_file_sizes"
+#define LBANN_OPTION_SAMPLE_LIST_TEST "sample_list_test"
+#define LBANN_OPTION_SAMPLE_LIST_TRAIN "sample_list_train"
+#define LBANN_OPTION_SAMPLE_LIST_VALIDATE "sample_list_validate"
+#define LBANN_OPTION_SEQUENCE_LENGTH "sequence_length"
+#define LBANN_OPTION_SMILES_BUFFER_SIZE "smiles_buffer_size"
+#define LBANN_OPTION_TEST_TARBALL "test_tarball"
+#define LBANN_OPTION_VOCAB "vocab"
 
 /****** jag options ******/
 // Bool flags
-#define JAG "jag"
-#define JAG_PARTITIONED "jag_partitioned"
+#define LBANN_OPTION_JAG "jag"
+#define LBANN_OPTION_JAG_PARTITIONED "jag_partitioned"
 
 // Input options
-#define BASE_DIR "base_dir"
-#define FILELIST "filelist"
-#define FILENAME "filename"
-#define FORMAT "format"
-#define INDEX_FN "index_fn"
-#define MAPPING_FN "mapping_fn"
-#define NUM_LISTS "num_lists"
-#define NUM_SAMPLES "num_samples"
-#define NUM_SAMPLES_PER_FILE "num_samples_per_file"
-#define NUM_SAMPLES_PER_LIST "num_samples_per_list"
-#define NUM_SUBDIRS "num_subdirs"
-#define OUTPUT_BASE_DIR "output_base_dir"
-#define OUTPUT_BASE_FN "output_base_fn"
-#define OUTPUT_DIR "output_dir"
-#define OUTPUT_FN "output_fn"
-#define SAMPLES_PER_FILE "samples_per_file"
+#define LBANN_OPTION_BASE_DIR "base_dir"
+#define LBANN_OPTION_FILELIST "filelist"
+#define LBANN_OPTION_FILENAME "filename"
+#define LBANN_OPTION_FORMAT "format"
+#define LBANN_OPTION_INDEX_FN "index_fn"
+#define LBANN_OPTION_MAPPING_FN "mapping_fn"
+#define LBANN_OPTION_NUM_LISTS "num_lists"
+#define LBANN_OPTION_NUM_SAMPLES "num_samples"
+#define LBANN_OPTION_NUM_SAMPLES_PER_FILE "num_samples_per_file"
+#define LBANN_OPTION_NUM_SAMPLES_PER_LIST "num_samples_per_list"
+#define LBANN_OPTION_NUM_SUBDIRS "num_subdirs"
+#define LBANN_OPTION_OUTPUT_BASE_DIR "output_base_dir"
+#define LBANN_OPTION_OUTPUT_BASE_FN "output_base_fn"
+#define LBANN_OPTION_OUTPUT_DIR "output_dir"
+#define LBANN_OPTION_OUTPUT_FN "output_fn"
+#define LBANN_OPTION_SAMPLES_PER_FILE "samples_per_file"
 
 void construct_std_options();
 void construct_datastore_options();
diff --git a/model_zoo/jag_utils/build_index.cpp b/model_zoo/jag_utils/build_index.cpp
index 26490bd277b..c482be54fb0 100644
--- a/model_zoo/jag_utils/build_index.cpp
+++ b/model_zoo/jag_utils/build_index.cpp
@@ -83,15 +83,15 @@ int main(int argc, char *argv[]) {
       return EXIT_SUCCESS;
     }
 
-    if (arg_parser.get<std::string>(FILELIST) == "" ||
-        arg_parser.get<std::string>(OUTPUT_FN) == "" ||
-        arg_parser.get<std::string>(BASE_DIR)) {
+    if (arg_parser.get<std::string>(LBANN_OPTION_FILELIST) == "" ||
+        arg_parser.get<std::string>(LBANN_OPTION_OUTPUT_FN) == "" ||
+        arg_parser.get<std::string>(LBANN_OPTION_BASE_DIR)) {
       throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: improper invocation; run with no cmd line args for proper invocation");
     }
 
-    const std::string input_fn = arg_parser.get<std::string>(FILELIST);
-    const std::string output_fn = arg_parser.get<std::string>(OUTPUT_FN);
-    const std::string base_dir = arg_parser.get<std::string>(BASE_DIR);
+    const std::string input_fn = arg_parser.get<std::string>(LBANN_OPTION_FILELIST);
+    const std::string output_fn = arg_parser.get<std::string>(LBANN_OPTION_OUTPUT_FN);
+    const std::string base_dir = arg_parser.get<std::string>(LBANN_OPTION_BASE_DIR);
 
     int rank = comm->get_rank_in_world();
     std::stringstream ss;
diff --git a/model_zoo/jag_utils/build_sample_id_mapping.cpp b/model_zoo/jag_utils/build_sample_id_mapping.cpp
index 594dc425a3b..143699a7bf4 100644
--- a/model_zoo/jag_utils/build_sample_id_mapping.cpp
+++ b/model_zoo/jag_utils/build_sample_id_mapping.cpp
@@ -63,7 +63,7 @@ int total = 0;
   // get list of conduit filenames
   if (master) cerr << "reading filelist\n";
   vector<string> filenames;
-  string base_dir = arg_parser.get<std::string>(BASE_DIR);
+  string base_dir = arg_parser.get<std::string>(LBANN_OPTION_BASE_DIR);
   if (base_dir.back() != '/') {
     base_dir += '/';
   }
diff --git a/model_zoo/jag_utils/check_for_duplicate_samples.cpp b/model_zoo/jag_utils/check_for_duplicate_samples.cpp
index c6e08e63794..d3ded7a2b37 100644
--- a/model_zoo/jag_utils/check_for_duplicate_samples.cpp
+++ b/model_zoo/jag_utils/check_for_duplicate_samples.cpp
@@ -72,7 +72,7 @@ int main(int argc, char *argv[]) {
     }
 
     // sanity check invocation
-    if (arg_parser.get<std::string>(FILELIST) == "") {
+    if (arg_parser.get<std::string>(LBANN_OPTION_FILELIST) == "") {
       if (master) {
         throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: usage: " + argv[0] + " --filelist=<string>");
       }
@@ -80,7 +80,7 @@ int main(int argc, char *argv[]) {
 
     // read list of conduit filenames
     std::vector<std::string> files;
-    const std::string fn = arg_parser.get<std::string>(FILELIST);
+    const std::string fn = arg_parser.get<std::string>(LBANN_OPTION_FILELIST);
     read_filelist(comm.get(), fn, files);
 
     std::unordered_set<std::string> input_names;
diff --git a/model_zoo/jag_utils/check_images.cpp b/model_zoo/jag_utils/check_images.cpp
index cf523e09674..d7c89b2bf0f 100644
--- a/model_zoo/jag_utils/check_images.cpp
+++ b/model_zoo/jag_utils/check_images.cpp
@@ -41,7 +41,7 @@
 using namespace lbann;
 
 #define NUM_OUTPUT_DIRS 100
-#define NUM_SAMPLES_PER_FILE 1000
+#define LBANN_OPTION_NUM_SAMPLES_PER_FILE 1000
 
 //==========================================================================
 int main(int argc, char *argv[]) {
@@ -68,18 +68,18 @@ int main(int argc, char *argv[]) {
       std::terminate();
     }
 
-    if (arg_parser.get<std::string>(FILELIST)) {
+    if (arg_parser.get<std::string>(LBANN_OPTION_FILELIST)) {
       if (master) {
         throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: usage: " + argv[0] + " --filelist");
       }
     }
 
     std::vector<std::string> files;
-    std::ifstream in(arg_parser.get<std::string>(FILELIST).c_str());
+    std::ifstream in(arg_parser.get<std::string>(LBANN_OPTION_FILELIST).c_str());
     if (!in) {
       throw lbann_exception(std::string{} + __FILE__ + " " +
                             std::to_string(__LINE__) + " :: failed to open " +
-                            arg_parser.get<std::string>(FILELIST) +
+                            arg_parser.get<std::string>(LBANN_OPTION_FILELIST) +
                             " for reading");
     }
     std::string line;
diff --git a/model_zoo/jag_utils/compute_hydra_normalization.cpp b/model_zoo/jag_utils/compute_hydra_normalization.cpp
index d56c2af7fd3..19a2dab7fb6 100644
--- a/model_zoo/jag_utils/compute_hydra_normalization.cpp
+++ b/model_zoo/jag_utils/compute_hydra_normalization.cpp
@@ -76,7 +76,7 @@ int main(int argc, char *argv[]) {
       LBANN_ERROR("failed to open: normalize.txt for writing");
     }
 
-    if (arg_parser.get<std::string>(FILELIST) == "") {
+    if (arg_parser.get<std::string>(LBANN_OPTION_FILELIST) == "") {
       if (master) {
         throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: usage: " + argv[0] + " --filelist=<string>");
       }
@@ -109,9 +109,9 @@ int main(int argc, char *argv[]) {
       images_v_min[h].resize(MAGIC_NUMBER, DBL_MAX);
     }
 
-    ifstream in(arg_parser.get<std::string>(FILELIST).c_str());
+    ifstream in(arg_parser.get<std::string>(LBANN_OPTION_FILELIST).c_str());
     if (!in) {
-      LBANN_ERROR("failed to open " + arg_parser.get<std::string>(FILELIST) +
+      LBANN_ERROR("failed to open " + arg_parser.get<std::string>(LBANN_OPTION_FILELIST) +
                   " for reading");
     }
 
diff --git a/model_zoo/jag_utils/compute_min_max_images.cpp b/model_zoo/jag_utils/compute_min_max_images.cpp
index 95c4abf22c5..7cbe84dd479 100644
--- a/model_zoo/jag_utils/compute_min_max_images.cpp
+++ b/model_zoo/jag_utils/compute_min_max_images.cpp
@@ -66,18 +66,18 @@ int main(int argc, char *argv[]) {
       std::terminate();
     }
 
-    if (arg_parser.get<std::string>(FILELIST) == "" ||
-        arg_parser.get<std::string>(OUTPUT_DIR) == "") {
+    if (arg_parser.get<std::string>(LBANN_OPTION_FILELIST) == "" ||
+        arg_parser.get<std::string>(LBANN_OPTION_OUTPUT_DIR) == "") {
       if (master) {
         throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: usage: " + argv[0] + " --filelist=<string> --output_dir=<string>");
       }
     }
 
-    const std::string dir = arg_parser.get<std::string>(OUTPUT_DIR);
+    const std::string dir = arg_parser.get<std::string>(LBANN_OPTION_OUTPUT_DIR);
 
     if (master) {
       std::stringstream s;
-      s << "mkdir -p " << arg_parser.get<std::string>(OUTPUT_DIR);
+      s << "mkdir -p " << arg_parser.get<std::string>(LBANN_OPTION_OUTPUT_DIR);
       int r = system(s.str().c_str());
       if (r != 0) {
         throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: system call failed: " + s.str());
@@ -89,11 +89,11 @@ int main(int argc, char *argv[]) {
     int size;
     if (master) {
       std::stringstream s;
-      std::ifstream in(arg_parser.get<std::string>(FILELIST).c_str());
+      std::ifstream in(arg_parser.get<std::string>(LBANN_OPTION_FILELIST).c_str());
       if (!in) {
         throw lbann_exception(std::string{} + __FILE__ + " " +
                               std::to_string(__LINE__) + " :: failed to open " +
-                              arg_parser.get<std::string>(FILELIST) +
+                              arg_parser.get<std::string>(LBANN_OPTION_FILELIST) +
                               " for reading");
       }
       std::string line;
diff --git a/model_zoo/jag_utils/compute_per_channel_image_avg_min_max.cpp b/model_zoo/jag_utils/compute_per_channel_image_avg_min_max.cpp
index 559d9d7351b..73b9ea24168 100644
--- a/model_zoo/jag_utils/compute_per_channel_image_avg_min_max.cpp
+++ b/model_zoo/jag_utils/compute_per_channel_image_avg_min_max.cpp
@@ -65,7 +65,7 @@ int main(int argc, char *argv[]) {
       std::terminate();
     }
 
-    if (arg_parser.get<std::string>(FILELIST) == "") {
+    if (arg_parser.get<std::string>(LBANN_OPTION_FILELIST) == "") {
       if (master) {
         throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: usage: " + argv[0] + " --filelist=<string>");
       }
@@ -76,11 +76,11 @@ int main(int argc, char *argv[]) {
     int size;
     if (master) {
       std::stringstream s;
-      std::ifstream in(arg_parser.get<std::string>(FILELIST).c_str());
+      std::ifstream in(arg_parser.get<std::string>(LBANN_OPTION_FILELIST).c_str());
       if (!in) {
         throw lbann_exception(std::string{} + __FILE__ + " " +
                               std::to_string(__LINE__) + " :: failed to open " +
-                              arg_parser.get<std::string>(FILELIST) +
+                              arg_parser.get<std::string>(LBANN_OPTION_FILELIST) +
                               " for reading");
       }
       std::string line;
diff --git a/model_zoo/jag_utils/convert.cpp b/model_zoo/jag_utils/convert.cpp
index 8c930ae306f..abfa0ed0086 100644
--- a/model_zoo/jag_utils/convert.cpp
+++ b/model_zoo/jag_utils/convert.cpp
@@ -74,15 +74,15 @@ int main(int argc, char *argv[]) {
     std::terminate();
   }
 
-  if (arg_parser.get<std::string>(FILELIST) == "" ||
-      arg_parser.get<std::string>(OUTPUT_DIR) == "" ||
-      arg_parser.get<std::string>(FORMAT) == "") {
+  if (arg_parser.get<std::string>(LBANN_OPTION_FILELIST) == "" ||
+      arg_parser.get<std::string>(LBANN_OPTION_OUTPUT_DIR) == "" ||
+      arg_parser.get<std::string>(LBANN_OPTION_FORMAT) == "") {
     LBANN_ERROR("usage: test_speed_hydra_ --filelist=<string> --output_dir=<string> --format=<hdf5|conduit_bin>");
   }
 
-  string filelist = arg_parser.get<std::string>(FILELIST);
-  string format = arg_parser.get<std::string>(FORMAT);
-  string output_dir = arg_parser.get<std::string>(OUTPUT_DIR);
+  string filelist = arg_parser.get<std::string>(LBANN_OPTION_FILELIST);
+  string format = arg_parser.get<std::string>(LBANN_OPTION_FORMAT);
+  string output_dir = arg_parser.get<std::string>(LBANN_OPTION_OUTPUT_DIR);
   stringstream s;
   s << "mkdir -p " << output_dir;
   system(s.str().c_str());
diff --git a/model_zoo/jag_utils/convert_npz_to_conduit.cpp b/model_zoo/jag_utils/convert_npz_to_conduit.cpp
index 32f3664c929..c2175e5e037 100644
--- a/model_zoo/jag_utils/convert_npz_to_conduit.cpp
+++ b/model_zoo/jag_utils/convert_npz_to_conduit.cpp
@@ -62,7 +62,7 @@ int main(int argc, char *argv[]) {
       std::terminate();
     }
 
-    if (arg_parser.get<std::string>(FILELIST) == "") {
+    if (arg_parser.get<std::string>(LBANN_OPTION_FILELIST) == "") {
       if (master) {
         std::cerr << "usage: " << argv[1] << " --filelist=<string>\n"
                   << "function: converts npz files to conduit\n";
@@ -71,7 +71,7 @@ int main(int argc, char *argv[]) {
       return EXIT_FAILURE;
     }
 
-    const std::string input_fn = arg_parser.get<std::string>(FILELIST);
+    const std::string input_fn = arg_parser.get<std::string>(LBANN_OPTION_FILELIST);
 
     int rank = comm->get_rank_in_world();
     int np = comm->get_procs_in_world();
diff --git a/model_zoo/jag_utils/detect_corruption.cpp b/model_zoo/jag_utils/detect_corruption.cpp
index 8bd61366511..1415df7258e 100644
--- a/model_zoo/jag_utils/detect_corruption.cpp
+++ b/model_zoo/jag_utils/detect_corruption.cpp
@@ -71,13 +71,13 @@ int main(int argc, char *argv[]) {
     }
 
     // sanity check invocation
-    if (arg_parser.get<std::string>(FILELIST) == "") {
+    if (arg_parser.get<std::string>(LBANN_OPTION_FILELIST) == "") {
       if (master) {
         throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: usage: " + argv[0] + " --filelist=<string> \nwhere: 'filelist' is a file that contains the fully qualified filenames of the conduit *'bundle' files that are to be inspected.\nfunction: attemptsto detect and report currupt files and/or samples within those files.");
       }
     }
 
-    const std::string fn = arg_parser.get<std::string>(FILELIST);
+    const std::string fn = arg_parser.get<std::string>(LBANN_OPTION_FILELIST);
     std::vector<std::string> filenames;
     read_filelist(comm.get(), fn, filenames);
 
diff --git a/model_zoo/jag_utils/extract_random_samples.cpp b/model_zoo/jag_utils/extract_random_samples.cpp
index b3ed834e67d..259e1928a78 100644
--- a/model_zoo/jag_utils/extract_random_samples.cpp
+++ b/model_zoo/jag_utils/extract_random_samples.cpp
@@ -41,8 +41,6 @@
 using namespace lbann;
 
 #define NUM_OUTPUT_DIRS 100
-// TODO MRW
-#define NUM_SAMPLES_PER_FILE 1000
 
 //==========================================================================
 void check_invocation(bool master);
@@ -114,7 +112,7 @@ int main(int argc, char *argv[]) {
     }
 
     // ensure the db contains NUM_SAMPLES_PER_FILE
-    arg_parser.get<int>(NUM_SAMPLES_PER_FILE);
+    arg_parser.get<int>(LBANN_OPTION_NUM_SAMPLES_PER_FILE);
 
     int num_output_dirs;
     if (master) {
@@ -134,7 +132,7 @@ int main(int argc, char *argv[]) {
 
     build_exclusion_set(exclude);
     std::vector<int> indices_v;
-    size_t num_samples = arg_parser.get<int>(NUM_SAMPLES);
+    size_t num_samples = arg_parser.get<int>(LBANN_OPTION_NUM_SAMPLES);
     indices_v.reserve(num_samples);
 
     get_random_sample_indices(exclude, indices, global_num_samples);
@@ -142,7 +140,7 @@ int main(int argc, char *argv[]) {
     if (master) {
       // write set of random indices to file; these can be used
       // as an exclusion set for a subsequent run
-      const std::string base_dir = arg_parser.get<std::string>(OUTPUT_BASE_DIR);
+      const std::string base_dir = arg_parser.get<std::string>(LBANN_OPTION_OUTPUT_BASE_DIR);
       const std::string fn = base_dir + "/random_indices.txt";
       std::ofstream out(fn.c_str());
       if (!out) {
@@ -176,8 +174,8 @@ int main(int argc, char *argv[]) {
 }
 
 void get_random_sample_indices(const std::unordered_set<int> &exclude, std::set<int> &indices, int global_num_samples) {
-  size_t num_samples = options::get()->get_int(NUM_SAMPLES);
-  int seed = options::get()->get_int(RANDOM_SEED);
+  size_t num_samples = options::get()->get_int(LBANN_OPTION_NUM_SAMPLES);
+  int seed = options::get()->get_int(LBANN_OPTION_RANDOM_SEED);
   srand(seed);
   while (indices.size() < num_samples) {
     int v = rand() % global_num_samples;
@@ -227,9 +225,9 @@ void build_exclusion_set(std::unordered_set<int> &exclude) {
 
 int construct_output_directories(int np) {
   auto& arg_parser = global_argument_parser();
-  int num_samples_per_file = arg_parser.get<int>(NUM_SAMPLES_PER_FILE);
-  int num_samples = arg_parser.get<int>(NUM_SAMPLES);
-  const std::string base_dir = options::get()->get_string(OUTPUT_BASE_DIR);
+  int num_samples_per_file = arg_parser.get<int>(LBANN_OPTION_NUM_SAMPLES_PER_FILE);
+  int num_samples = arg_parser.get<int>(LBANN_OPTION_NUM_SAMPLES);
+  const std::string base_dir = options::get()->get_string(LBANN_OPTION_OUTPUT_BASE_DIR);
   int num_output_dirs = ((num_samples / num_samples_per_file + 1) / np) *2;
 
   for (int j=0; j<num_output_dirs; j++) {
@@ -249,7 +247,7 @@ void build_sample_mapping(
   std::vector<std::set<int> > &samples) {
 
   // open index file
-  const std::string index_fn = options::get()->get_string(INDEX_FN);
+  const std::string index_fn = options::get()->get_string(LBANN_OPTION_INDEX_FN);
   std::ifstream in(index_fn);
   if (!in) {
     throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to open " + index_fn + " for reading");
@@ -294,10 +292,10 @@ void build_sample_mapping(
 
 void check_invocation(bool master) {
   auto& arg_parser = global_argument_parser();
-  if (arg_parser.get<std::string>(INDEX_FN) == "" ||
-      arg_parser.get<int>(NUM_SAMPLES) == -1 ||
-      arg_parser.get<std::string>(OUTPUT_BASE_DIR) == "" ||
-      arg_parser.get<int>(RANDOM_SEED) == -1) {
+  if (arg_parser.get<std::string>(LBANN_OPTION_INDEX_FN) == "" ||
+      arg_parser.get<int>(LBANN_OPTION_NUM_SAMPLES) == -1 ||
+      arg_parser.get<std::string>(LBANN_OPTION_OUTPUT_BASE_DIR) == "" ||
+      arg_parser.get<int>(LBANN_OPTION_RANDOM_SEED) == -1) {
     if (master) {
       throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: improper invocation; see usage message below\n\n " + usage() + "\n\n");
     }
@@ -305,7 +303,7 @@ void check_invocation(bool master) {
 }
 
 void get_global_num_samples(int &num_samples, int &num_files) {
-  const std::string index_fn = options::get()->get_string(INDEX_FN);
+  const std::string index_fn = options::get()->get_string(LBANN_OPTION_INDEX_FN);
   std::ifstream in(index_fn);
   if (!in) {
     throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to open " + index_fn + " for reading");
@@ -321,7 +319,7 @@ void extract_samples(
   const std::vector<std::string> &filenames,
   const std::vector<std::set<int> > &samples) {
 
-  const std::string base_dir = options::get()->get_string(OUTPUT_BASE_DIR);
+  const std::string base_dir = options::get()->get_string(LBANN_OPTION_OUTPUT_BASE_DIR);
   char b[1024];
   sprintf(b, "%s/_sample_ids_%d.txt", base_dir.c_str(), rank);
   std::ofstream out_ids(b);
@@ -329,7 +327,7 @@ void extract_samples(
     throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: failed to open " + b + " for writing");
   }
   int num_output_dirs = options::get()->get_int("num_output_dirs");
-  int num_samples_per_file = options::get()->get_int(NUM_SAMPLES_PER_FILE);
+  int num_samples_per_file = options::get()->get_int(LBANN_OPTION_NUM_SAMPLES_PER_FILE);
   int file_id = 0;
   int dir_id = 0;
   int n_samples = 0;
diff --git a/model_zoo/jag_utils/generate_corrupt_samples.cpp b/model_zoo/jag_utils/generate_corrupt_samples.cpp
index d0a162195d3..bba25e849c8 100644
--- a/model_zoo/jag_utils/generate_corrupt_samples.cpp
+++ b/model_zoo/jag_utils/generate_corrupt_samples.cpp
@@ -69,7 +69,7 @@ int main(int argc, char *argv[]) {
   }
 
   // sanity check invocation
-  if (arg_parser.get<std::string>(FILELIST) == "") {
+  if (arg_parser.get<std::string>(LBANN_OPTION_FILELIST) == "") {
     if (master) {
       err << " :: usage: " << argv[0] << " --filelist=<string>\n"
           << "WARNING: this driver deletes the directory 'corrupt_jag_samples' if it exists "
@@ -80,7 +80,7 @@ int main(int argc, char *argv[]) {
 
   // read list of conduit filenames
   std::vector<std::string> files;
-  const std::string fn = arg_parser.get<std::string>(FILELIST);
+  const std::string fn = arg_parser.get<std::string>(LBANN_OPTION_FILELIST);
   read_filelist(comm.get(), fn, files);
 
   int ee = system("rm -rf corrupt_jag_samples");
diff --git a/model_zoo/jag_utils/load_balance.cpp b/model_zoo/jag_utils/load_balance.cpp
index f9628414cef..63ffe741e67 100644
--- a/model_zoo/jag_utils/load_balance.cpp
+++ b/model_zoo/jag_utils/load_balance.cpp
@@ -64,18 +64,18 @@ int main(int argc, char *argv[]) {
     }
 
     // sanity check invocation
-    if (arg_parser.get<std::string>(FILELIST) == "" ||
-        arg_parser.get<std::string>(OUTPUT_BASE_DIR) == "" ||
-        arg_parser.get<int>(NUM_SUBDIRS) == -1 ||
-        arg_parser.get<int>(SAMPLES_PER_FILE) == -1) {
+    if (arg_parser.get<std::string>(LBANN_OPTION_FILELIST) == "" ||
+        arg_parser.get<std::string>(LBANN_OPTION_OUTPUT_BASE_DIR) == "" ||
+        arg_parser.get<int>(LBANN_OPTION_NUM_SUBDIRS) == -1 ||
+        arg_parser.get<int>(LBANN_OPTION_SAMPLES_PER_FILE) == -1) {
       if (master) {
         throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: usage: " + argv[0] + " --filelist=<string> --output_base_dir=<string> --num_subdirs=<int> --samples_per_file=<int>");
       }
     }
 
-    const int num_dirs = arg_parser.get<int>(NUM_SUBDIRS);
-    const std::string base = arg_parser.get<std::string>(OUTPUT_BASE_DIR);
-    const int samples_per_file = arg_parser.get<int>(SAMPLES_PER_FILE);
+    const int num_dirs = arg_parser.get<int>(LBANN_OPTION_NUM_SUBDIRS);
+    const std::string base = arg_parser.get<std::string>(LBANN_OPTION_OUTPUT_BASE_DIR);
+    const int samples_per_file = arg_parser.get<int>(LBANN_OPTION_SAMPLES_PER_FILE);
 
     // master creates output directory structure
     if (master) {
@@ -98,11 +98,11 @@ int main(int argc, char *argv[]) {
     int size;
     if (master) {
       std::stringstream s;
-      std::ifstream in(arg_parser.get<std::string>(FILELIST).c_str());
+      std::ifstream in(arg_parser.get<std::string>(LBANN_OPTION_FILELIST).c_str());
       if (!in) {
         throw lbann_exception(std::string{} + __FILE__ + " " +
                               std::to_string(__LINE__) + " :: failed to open " +
-                              arg_parser.get<std::string>(FILELIST) +
+                              arg_parser.get<std::string>(LBANN_OPTION_FILELIST) +
                               " for reading");
       }
       std::string line;
diff --git a/model_zoo/jag_utils/load_bundle2raw.cpp b/model_zoo/jag_utils/load_bundle2raw.cpp
index 96c1dc43f2a..7d5f7fb238b 100644
--- a/model_zoo/jag_utils/load_bundle2raw.cpp
+++ b/model_zoo/jag_utils/load_bundle2raw.cpp
@@ -70,18 +70,18 @@ int main(int argc, char *argv[]) {
       std::terminate();
     }
 
-    if (arg_parser.get<std::string>(FILELIST) == "" ||
-        arg_parser.get<std::string>(OUTPUT_DIR) == "") {
+    if (arg_parser.get<std::string>(LBANN_OPTION_FILELIST) == "" ||
+        arg_parser.get<std::string>(LBANN_OPTION_OUTPUT_DIR) == "") {
       if (master) {
         throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: usage: " + argv[0] + " --filelist=<string> --output_dir=<string>");
       }
     }
 
-    const std::string dir = arg_parser.get<std::string>(OUTPUT_DIR);
+    const std::string dir = arg_parser.get<std::string>(LBANN_OPTION_OUTPUT_DIR);
 
     if (master) {
       std::stringstream s;
-      s << "mkdir -p " << arg_parser.get<std::string>(OUTPUT_DIR);
+      s << "mkdir -p " << arg_parser.get<std::string>(LBANN_OPTION_OUTPUT_DIR);
       int r = system(s.str().c_str());
       if (r != 0) {
         throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: system call failed: " + s.str());
@@ -89,7 +89,7 @@ int main(int argc, char *argv[]) {
     }
 
     std::vector<std::string> files;
-    const std::string fn = arg_parser.get<std::string>(FILELIST);
+    const std::string fn = arg_parser.get<std::string>(LBANN_OPTION_FILELIST);
     read_filelist(comm.get(), fn, files);
 
     std::vector<std::string> scalar_names;
diff --git a/model_zoo/jag_utils/select_samples.cpp b/model_zoo/jag_utils/select_samples.cpp
index f9914752eb3..1683cd69624 100644
--- a/model_zoo/jag_utils/select_samples.cpp
+++ b/model_zoo/jag_utils/select_samples.cpp
@@ -125,8 +125,8 @@ int main(int argc, char **argv) {
     std::unordered_map<std::string, std::string> filename_data;
     build_index_maps(index_map_keep, index_map_exclude, string_to_index, filename_data);
 
-    // partition the randomly selected samples into NUM_LISTS sets
-    int num_lists = arg_parser.get<int>(NUM_LISTS);
+    // partition the randomly selected samples into LBANN_OPTION_NUM_LISTS sets
+    int num_lists = arg_parser.get<int>(LBANN_OPTION_NUM_LISTS);
     vector<unordered_map<string, unordered_set<int>>> subsets(num_lists);
     divide_selected_samples(index_map_keep, subsets);
 
@@ -140,7 +140,7 @@ int main(int argc, char **argv) {
     cout << "SUCESS - FINISHED!\n";
 
   } catch (lbann::exception& e) {
-    if (options::get()->get_bool(STACK_TRACE_TO_FILE)) {
+    if (options::get()->get_bool(LBANN_OPTION_STACK_TRACE_TO_FILE)) {
       ostringstream ss("stack_trace");
       const auto& rank = lbann::get_rank_in_world();
       if (rank >= 0) {
@@ -164,33 +164,33 @@ int main(int argc, char **argv) {
 void check_cmd_line() {
   auto& arg_parser = global_argument_parser();
   stringstream err;
-  if (!(arg_parser.get<std::string>(INDEX_FN) != "" &&
-        arg_parser.get<std::string>(MAPPING_FN) != "" &&
-        arg_parser.get<int>(NUM_SAMPLES_PER_LIST) != -1 &&
-        arg_parser.get<int>(NUM_LISTS) != -1 &&
-        arg_parser.get<int>(RANDOM_SEED) != -1 &&
-        arg_parser.get<std::string>(OUTPUT_DIR) != "" &&
-        arg_parser.get<std::string>(OUTPUT_BASE_FN) != "")) {
+  if (!(arg_parser.get<std::string>(LBANN_OPTION_INDEX_FN) != "" &&
+        arg_parser.get<std::string>(LBANN_OPTION_MAPPING_FN) != "" &&
+        arg_parser.get<int>(LBANN_OPTION_NUM_SAMPLES_PER_LIST) != -1 &&
+        arg_parser.get<int>(LBANN_OPTION_NUM_LISTS) != -1 &&
+        arg_parser.get<int>(LBANN_OPTION_RANDOM_SEED) != -1 &&
+        arg_parser.get<std::string>(LBANN_OPTION_OUTPUT_DIR) != "" &&
+        arg_parser.get<std::string>(LBANN_OPTION_OUTPUT_BASE_FN) != "")) {
     cout << help_msg();
-    if (arg_parser.get<std::string>(INDEX_FN) == "") {
+    if (arg_parser.get<std::string>(LBANN_OPTION_INDEX_FN) == "") {
       cout << "missing --index_fn=<string> \n";
     }
-    if (arg_parser.get<std::string>(MAPPING_FN) == "") {
+    if (arg_parser.get<std::string>(LBANN_OPTION_MAPPING_FN) == "") {
       cout << "missing --mapping_fn=<string> \n";
     }
-    if (arg_parser.get<int>(NUM_SAMPLES_PER_LIST) == -1) {
+    if (arg_parser.get<int>(LBANN_OPTION_NUM_SAMPLES_PER_LIST) == -1) {
       cout << "missing --num_samples_per_list=<int> \n";
     }
-    if (arg_parser.get<int>(NUM_LISTS) == -1) {
+    if (arg_parser.get<int>(LBANN_OPTION_NUM_LISTS) == -1) {
       cout << "missing --num_lists=<int> \n";
     }
-    if (arg_parser.get<int>(RANDOM_SEED) == -1) {
+    if (arg_parser.get<int>(LBANN_OPTION_RANDOM_SEED) == -1) {
       cout << "missing --random_seed=<int> \n";
     }
-    if (arg_parser.get<std::string>(OUTPUT_DIR) == "") {
+    if (arg_parser.get<std::string>(LBANN_OPTION_OUTPUT_DIR) == "") {
       cout << "missing --output_dir=<string> \n";
     }
-    if (arg_parser.get<std::string>(OUTPUT_BASE_FN) == "") {
+    if (arg_parser.get<std::string>(LBANN_OPTION_OUTPUT_BASE_FN) == "") {
       cout << "missing --output_base_fn=<string> \n";
     }
     cout << "\n";
@@ -217,7 +217,7 @@ string help_msg() {
 void read_mapping_file(unordered_map<string, unordered_set<string>> &sample_mapping, unordered_map<string, vector<string>> &sample_mapping_v, unordered_map<string, int>& string_to_index) {
   cout << "starting read_mapping_file\n";
   double tm1 = lbann::get_time();
-  const string mapping_fn = options::get()->get_string(MAPPING_FN);
+  const string mapping_fn = options::get()->get_string(LBANN_OPTION_MAPPING_FN);
   ifstream in(mapping_fn.c_str());
   if (!in) {
     LBANN_ERROR("failed to open ", mapping_fn, " for reading");
@@ -260,12 +260,12 @@ void build_index_maps(
   cout << "starting build_index_maps\n";
   double tm1 = lbann::get_time();
 
-  int samples_per_list = options::get()->get_int(NUM_SAMPLES_PER_LIST);
-  int num_lists = options::get()->get_int(NUM_LISTS);
+  int samples_per_list = options::get()->get_int(LBANN_OPTION_NUM_SAMPLES_PER_LIST);
+  int num_lists = options::get()->get_int(LBANN_OPTION_NUM_LISTS);
   size_t num_samples = samples_per_list * num_lists;
 
   //open input file
-  const string index_fn = options::get()->get_string(INDEX_FN).c_str();
+  const string index_fn = options::get()->get_string(LBANN_OPTION_INDEX_FN).c_str();
   ifstream in(index_fn.c_str());
   if (!in) {
     LBANN_ERROR("failed to open ", index_fn, " for reading");
@@ -288,7 +288,7 @@ void build_index_maps(
   cout << "generating random indices ...\n";
   double tm2 = lbann::get_time();
   unordered_set<int> random_indices;
-  srandom(options::get()->get_int(RANDOM_SEED));
+  srandom(options::get()->get_int(LBANN_OPTION_RANDOM_SEED));
   while (true) {
     int v = random() % num_valid;
     random_indices.insert(v);
@@ -351,7 +351,7 @@ void build_index_maps(
 }
 
 void sanity_test_request() {
-  const string index_fn = options::get()->get_string(INDEX_FN).c_str();
+  const string index_fn = options::get()->get_string(LBANN_OPTION_INDEX_FN).c_str();
   ifstream in(index_fn.c_str());
   if (!in) {
     LBANN_ERROR("failed to open ", index_fn, " for reading");
@@ -365,8 +365,8 @@ void sanity_test_request() {
 
   int num_valid, num_invalid, num_files;
   in >> num_valid >> num_invalid >> num_files;
-  int samples_per_list = options::get()->get_int(NUM_SAMPLES_PER_LIST);
-  int num_lists = options::get()->get_int(NUM_LISTS);
+  int samples_per_list = options::get()->get_int(LBANN_OPTION_NUM_SAMPLES_PER_LIST);
+  int num_lists = options::get()->get_int(LBANN_OPTION_NUM_LISTS);
   int num_samples = samples_per_list * num_lists;
   if (num_samples > num_valid) {
     LBANN_ERROR("you requested a total of ", num_samples, " samples, but only ", num_valid, " are available");
@@ -376,7 +376,7 @@ void sanity_test_request() {
 void divide_selected_samples(
     const unordered_map<string, unordered_set<int>> &index_map_keep,
     vector<unordered_map<string, unordered_set<int>>> &sets) {
-  size_t samples_per_list = options::get()->get_int(NUM_SAMPLES_PER_LIST);
+  size_t samples_per_list = options::get()->get_int(LBANN_OPTION_NUM_SAMPLES_PER_LIST);
   size_t which = 0;
   size_t count = 0;
   size_t total = 0;
@@ -415,8 +415,8 @@ void write_sample_list(
     const vector<unordered_map<string, unordered_set<int>>> &subsets,
     const unordered_map<string, vector<string>> &sample_mapping_v,
     const std::unordered_map<std::string, std::string> &filename_data) {
-  const string dir = options::get()->get_string(OUTPUT_DIR);
-  const string fn = options::get()->get_string(OUTPUT_BASE_FN);
+  const string dir = options::get()->get_string(LBANN_OPTION_OUTPUT_DIR);
+  const string fn = options::get()->get_string(LBANN_OPTION_OUTPUT_BASE_FN);
   stringstream s;
   s << dir << '/' << "t" << n << '_' << fn;
   ofstream out(s.str().c_str());
@@ -489,7 +489,7 @@ void write_sample_list(
     }
   }
 
-  const string base_dir = options::get()->get_string(BASE_DIR);
+  const string base_dir = options::get()->get_string(LBANN_OPTION_BASE_DIR);
 
   out << total_good << " " << total_bad << " " << num_include_files
       << "\n" << base_dir << "\n" << sout.str();
@@ -528,7 +528,7 @@ void make_dir(char *cpath) {
 void test_output_dir() {
   cout << "\nChecking if output diretory path exists;\n"
           " if not, we'll attempt to create it.\n";
-  const string dir = options::get()->get_string(OUTPUT_DIR);
+  const string dir = options::get()->get_string(LBANN_OPTION_OUTPUT_DIR);
   char *cpath = strdup(dir.c_str());
   char *pp = cpath;
   if (pp[0] == '/') {
@@ -571,8 +571,8 @@ void write_bar_files(
 
   unordered_set<string> all_excluded;
 
-  const string dir = options::get()->get_string(OUTPUT_DIR);
-  const string base_fn = options::get()->get_string(OUTPUT_BASE_FN);
+  const string dir = options::get()->get_string(LBANN_OPTION_OUTPUT_DIR);
+  const string base_fn = options::get()->get_string(LBANN_OPTION_OUTPUT_BASE_FN);
   stringstream s;
   s << dir << '/' << "t_exclusion_" << base_fn << "_bar";
   std::cerr << "\nWRITING exclusion bar file: " << s.str() << "\n";
@@ -623,7 +623,7 @@ void write_bar_files(
     }
   }
 
-  const string base_dir = options::get()->get_string(BASE_DIR);
+  const string base_dir = options::get()->get_string(LBANN_OPTION_BASE_DIR);
   out << total_good << " " << total_bad << " " << num_include_files << "\n"
       << base_dir << endl << sout.str();
   out.close();
diff --git a/model_zoo/jag_utils/test_conduit_hdf5.cpp b/model_zoo/jag_utils/test_conduit_hdf5.cpp
index 09db2a23818..bc1898c7432 100644
--- a/model_zoo/jag_utils/test_conduit_hdf5.cpp
+++ b/model_zoo/jag_utils/test_conduit_hdf5.cpp
@@ -73,13 +73,13 @@ int main(int argc, char *argv[]) {
   }
 
   // sanity check invocation
-  if (arg_parser.get<std::string>(FILENAME) == "") {
+  if (arg_parser.get<std::string>(LBANN_OPTION_FILENAME) == "") {
     if (master) {
       throw lbann_exception(std::string{} + __FILE__ + " " + std::to_string(__LINE__) + " :: usage: " + argv[0] + " --filename=<string>\ne.g: --filename=/p/lscratchh/brainusr/datasets/conduit_test/from_100M.bundle");
     }
   }
 
-  const std::string filename = arg_parser.get<std::string>(FILENAME);
+  const std::string filename = arg_parser.get<std::string>(LBANN_OPTION_FILENAME);
 
   // get lists of inputs and scalars to read from file
   std::unordered_set<std::string> input_names;
diff --git a/model_zoo/jag_utils/test_reading_speed.cpp b/model_zoo/jag_utils/test_reading_speed.cpp
index bdca301fbbf..9acbab9fe62 100644
--- a/model_zoo/jag_utils/test_reading_speed.cpp
+++ b/model_zoo/jag_utils/test_reading_speed.cpp
@@ -74,15 +74,15 @@ int main(int argc, char *argv[]) {
     std::terminate();
   }
 
-  if (arg_parser.get<std::string>(FILELIST) == "") {
+  if (arg_parser.get<std::string>(LBANN_OPTION_FILELIST) == "") {
     LBANN_ERROR("usage: test_speed_hydra_ --filelist=<string> --jag");
   }
 
-  if (arg_parser.get<bool>(JAG)) {
-    test_jag(arg_parser.get<std::string>(FILELIST));
+  if (arg_parser.get<bool>(LBANN_OPTION_JAG)) {
+    test_jag(arg_parser.get<std::string>(LBANN_OPTION_FILELIST));
   }
   else {
-    test_hydra(arg_parser.get<std::string>(FILELIST));
+    test_hydra(arg_parser.get<std::string>(LBANN_OPTION_FILELIST));
   }
   return EXIT_SUCCESS;
 }
diff --git a/model_zoo/lbann.cpp b/model_zoo/lbann.cpp
index abedd83ac5d..dcff9371b0e 100644
--- a/model_zoo/lbann.cpp
+++ b/model_zoo/lbann.cpp
@@ -103,8 +103,8 @@ int main(int argc, char* argv[])
     }
 
     // Setup cuDNN and cuBLAS defaults
-    auto use_cudnn_tensor_ops = arg_parser.get<bool>(USE_CUDNN_TENSOR_OPS);
-    auto use_cublas_tensor_ops = arg_parser.get<bool>(USE_CUBLAS_TENSOR_OPS);
+    auto use_cudnn_tensor_ops = arg_parser.get<bool>(LBANN_OPTION_USE_CUDNN_TENSOR_OPS);
+    auto use_cublas_tensor_ops = arg_parser.get<bool>(LBANN_OPTION_USE_CUBLAS_TENSOR_OPS);
     if (master) {
       std::cout << "Default tensor core settings:\n"
                 << "   cuDNN: " << (use_cudnn_tensor_ops ? "" : "NOT ")
@@ -125,9 +125,9 @@ int main(int argc, char* argv[])
 #endif // LBANN_HAS_CUDA
 
     // this must be called after call to arg_parser.parse();
-    if (!arg_parser.get<bool>(DISABLE_SIGNAL_HANDLER)) {
+    if (!arg_parser.get<bool>(LBANN_OPTION_DISABLE_SIGNAL_HANDLER)) {
       std::string file_base =
-        (arg_parser.get<bool>(STACK_TRACE_TO_FILE) ? "stack_trace" : "");
+        (arg_parser.get<bool>(LBANN_OPTION_STACK_TRACE_TO_FILE) ? "stack_trace" : "");
       stack_trace::register_signal_handler(file_base);
     }
 
@@ -138,7 +138,7 @@ int main(int argc, char* argv[])
     allocate_trainer_resources(comm.get());
 
     int trainer_rank = 0;
-    if (arg_parser.get<bool>(GENERATE_MULTI_PROTO)) {
+    if (arg_parser.get<bool>(LBANN_OPTION_GENERATE_MULTI_PROTO)) {
       trainer_rank = comm->get_trainer_rank();
     }
     // Load the prototexts specificed on the command line
@@ -174,11 +174,11 @@ int main(int argc, char* argv[])
                                  trainer.get_callbacks_with_ownership(),
                                  training_dr_linearized_data_size);
 
-    if (arg_parser.get<bool>(CREATE_TARBALL)) {
+    if (arg_parser.get<bool>(LBANN_OPTION_CREATE_TARBALL)) {
       return EXIT_SUCCESS;
     }
 
-    if (!arg_parser.get<bool>(EXIT_AFTER_SETUP)) {
+    if (!arg_parser.get<bool>(LBANN_OPTION_EXIT_AFTER_SETUP)) {
 
       // Train model
       trainer.train(model.get(), pb_model->num_epochs());
@@ -205,7 +205,7 @@ int main(int argc, char* argv[])
     }
   }
   catch (exception& e) {
-    if (arg_parser.get<bool>(STACK_TRACE_TO_FILE)) {
+    if (arg_parser.get<bool>(LBANN_OPTION_STACK_TRACE_TO_FILE)) {
       std::ostringstream ss("stack_trace");
       const auto& rank = get_rank_in_world();
       if (rank >= 0) {
diff --git a/model_zoo/lbann_cycgan.cpp b/model_zoo/lbann_cycgan.cpp
index 919a7e90839..5e059b95d3b 100644
--- a/model_zoo/lbann_cycgan.cpp
+++ b/model_zoo/lbann_cycgan.cpp
@@ -101,9 +101,9 @@ int main(int argc, char *argv[]) {
       return EXIT_SUCCESS;
     }
 
-    if (!arg_parser.get<bool>(DISABLE_SIGNAL_HANDLER)) {
+    if (!arg_parser.get<bool>(LBANN_OPTION_DISABLE_SIGNAL_HANDLER)) {
       std::string file_base =
-        (arg_parser.get<bool>(STACK_TRACE_TO_FILE) ? "stack_trace" : "");
+        (arg_parser.get<bool>(LBANN_OPTION_STACK_TRACE_TO_FILE) ? "stack_trace" : "");
       stack_trace::register_signal_handler(file_base);
     }
 
diff --git a/model_zoo/tests/conduit_timing_test.cpp b/model_zoo/tests/conduit_timing_test.cpp
index 3d328f8f89a..6c03e655710 100644
--- a/model_zoo/tests/conduit_timing_test.cpp
+++ b/model_zoo/tests/conduit_timing_test.cpp
@@ -91,7 +91,7 @@ int main(int argc, char *argv[]) {
     return EXIT_SUCCESS;
   }
 
-  const std::string input_fn = arg_parser.get<std::string>(FILELIST);
+  const std::string input_fn = arg_parser.get<std::string>(LBANN_OPTION_FILELIST);
   std::vector<std::string> filenames;
   read_filelist(comm, input_fn, filenames);
 
diff --git a/src/callbacks/perturb_weights.cpp b/src/callbacks/perturb_weights.cpp
index a116bc7a49b..dda87a6da48 100644
--- a/src/callbacks/perturb_weights.cpp
+++ b/src/callbacks/perturb_weights.cpp
@@ -1,195 +1,195 @@
-////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-// Produced at the Lawrence Livermore National Laboratory.
-// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
-// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
-//
-// LLNL-CODE-697807.
-// All rights reserved.
-//
-// This file is part of LBANN: Livermore Big Artificial Neural Network
-// Toolkit. For details, see http://software.llnl.gov/LBANN or
-// https://github.com/LLNL/LBANN.
-//
-// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
-// may not use this file except in compliance with the License.  You may
-// obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the license.
-////////////////////////////////////////////////////////////////////////////////
-
-#include "lbann/comm_impl.hpp"
-#include "lbann/callbacks/perturb_weights.hpp"
-#include "lbann/proto/proto_common.hpp"
-#include "lbann/utils/serialize.hpp"
-#include "lbann/weights/data_type_weights.hpp"
-
-#include <callbacks.pb.h>
-
-#include <algorithm>
-
-namespace lbann {
-namespace callback {
-
-perturb_weights::perturb_weights(
-  EvalType upper,
-  EvalType lower,
-  EvalType scale,
-  EvalType perturb_probability,
-  std::string output_name,
-  El::Int batch_interval)
-  : callback_base(batch_interval),
-    m_output_name(std::move(output_name)),
-    m_upper(upper),
-    m_lower(lower),
-    m_scale(scale),
-    m_perturb_probability(perturb_probability)
-{}
-
-perturb_weights::perturb_weights()
-  : perturb_weights(0,0,0,0,"",0)
-{}
-
-template <class Archive>
-void perturb_weights::serialize(Archive & ar) {
-  ar(::cereal::make_nvp(
-       "BaseCallback",
-       ::cereal::base_class<callback_base>(this)),
-     CEREAL_NVP(m_output_name),
-     CEREAL_NVP(m_upper),
-     CEREAL_NVP(m_lower),
-     CEREAL_NVP(m_scale),
-     CEREAL_NVP(m_perturb_probability));
-}
-
-void perturb_weights::setup(model* m) {
-   weights* m_output = nullptr;
-
-   for (auto* w : m->get_weights()) {
-      if(w->get_name() == m_output_name){
-        m_output = w;
-        break;
-      }
-   }
-    if (m_output == nullptr) {
-      LBANN_ERROR("Current implementation of callback \"", name(), "\" "
-                  "requires a weight object to perturb");
-    }
-}
-
-void perturb_weights::on_batch_begin(model* m) {
-  const auto& c = m->get_execution_context();
-  weights* m_output = nullptr;
-
-  for (auto* w : m->get_weights()) {
-      if(w->get_name() == m_output_name){
-        m_output = w;
-        break;
-      }
-   }
-
-  if (m_output != nullptr &&
-      c.get_step() % m_batch_interval == 0 &&
-      c.get_execution_mode() == execution_mode::training) {
-    perturb(*m);
-  }
-}
-
-void perturb_weights::perturb(model& m){
-
-  auto* comm = m.get_comm();
-
-  // Useful constants
-  constexpr DataType zero = 0;
-  constexpr DataType one = 1;
-  DataType lower = m_lower;
-  DataType upper = m_upper;
-  DataType scale = m_scale;
-  DataType thres = one - m_perturb_probability;
-
-
-
-  // RNG
-  auto& gen = get_generator();
-  std::normal_distribution<DataType> norm(zero, one);
-  std::uniform_real_distribution<DataType> uni(zero,one);
-
-
-  for (auto* w : m.get_weights()) {
-    if (w == nullptr) {
-    	LBANN_ERROR("callback \"", name(), "\" "
-                  "got a weights pointer that is a null pointer");
-    }
-
-    // Check layer name
-    if(w->get_name() == m_output_name) {
-	auto& values = w->get_values();
-	auto& new_values = dynamic_cast<El::AbstractDistMatrix<DataType>&>(values);
-
-	auto& local_values = new_values.Matrix();
-	El::Matrix<DataType,El::Device::CPU> temp;
-	El::Copy(local_values, temp);
-
-	// Perturb weights on master process
-	if (comm->am_trainer_master()) {
-		for (auto i = 0; i < temp.Height(); i++){
-
-
-			// perturb
-			auto val = temp.Get(i,0);
-			auto perturbed_val = val;
-
-			if(uni(gen) > thres){
-				perturbed_val += norm(gen)*scale;
-				perturbed_val = std::min(std::max(perturbed_val, lower), upper);
-			}
-
-
-			temp.Set(i,0,perturbed_val);
-
-			El::Copy(temp, local_values);
-
-			std::cout << "Trainer [ " << m.get_comm()->get_trainer_rank() << " ], Step " << m.get_execution_context().get_step();
-			std::cout << " Weight " << i << ": " << val << " Perturbed weight  " <<  perturbed_val << std::endl;
-
-		}
-	}
-
-	// Communicate new weight from trainer master processes
-	El::Broadcast(new_values, comm->get_trainer_comm(), 0);
-
-	// Update weight
-	auto& out_w = dynamic_cast<data_type_weights<DataType>&>(*w);
-	out_w.set_values(new_values);
-
-	break;
-    }
-  }
-}
-
-
-std::unique_ptr<callback_base>
-build_perturb_weights_callback_from_pbuf(
-  const google::protobuf::Message& proto_msg, const std::shared_ptr<lbann_summary>&) {
-  const auto& params =
-    dynamic_cast<const lbann_data::Callback::CallbackPerturbWeights&>(proto_msg);
-  return make_unique<perturb_weights>(
-    params.upper(),
-    params.lower(),
-    params.scale(),
-    params.perturb_probability(),
-    params.output_name(),
-    params.batch_interval());
-}
-
-} // namespace callback
-} // namespace lbann
-
-#define LBANN_CLASS_NAME callback::perturb_weights
-#include <lbann/macros/register_class_with_cereal.hpp>
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory.
+// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
+// the CONTRIBUTORS file. <lbann-dev@llnl.gov>
+//
+// LLNL-CODE-697807.
+// All rights reserved.
+//
+// This file is part of LBANN: Livermore Big Artificial Neural Network
+// Toolkit. For details, see http://software.llnl.gov/LBANN or
+// https://github.com/LLNL/LBANN.
+//
+// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
+// may not use this file except in compliance with the License.  You may
+// obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the license.
+////////////////////////////////////////////////////////////////////////////////
+
+#include "lbann/comm_impl.hpp"
+#include "lbann/callbacks/perturb_weights.hpp"
+#include "lbann/proto/proto_common.hpp"
+#include "lbann/utils/serialize.hpp"
+#include "lbann/weights/data_type_weights.hpp"
+
+#include <callbacks.pb.h>
+
+#include <algorithm>
+
+namespace lbann {
+namespace callback {
+
+perturb_weights::perturb_weights(
+  EvalType upper,
+  EvalType lower,
+  EvalType scale,
+  EvalType perturb_probability,
+  std::string output_name,
+  El::Int batch_interval)
+  : callback_base(batch_interval),
+    m_output_name(std::move(output_name)),
+    m_upper(upper),
+    m_lower(lower),
+    m_scale(scale),
+    m_perturb_probability(perturb_probability)
+{}
+
+perturb_weights::perturb_weights()
+  : perturb_weights(0,0,0,0,"",0)
+{}
+
+template <class Archive>
+void perturb_weights::serialize(Archive & ar) {
+  ar(::cereal::make_nvp(
+       "BaseCallback",
+       ::cereal::base_class<callback_base>(this)),
+     CEREAL_NVP(m_output_name),
+     CEREAL_NVP(m_upper),
+     CEREAL_NVP(m_lower),
+     CEREAL_NVP(m_scale),
+     CEREAL_NVP(m_perturb_probability));
+}
+
+void perturb_weights::setup(model* m) {
+   weights* m_output = nullptr;
+
+   for (auto* w : m->get_weights()) {
+      if(w->get_name() == m_output_name){
+        m_output = w;
+        break;
+      }
+   }
+    if (m_output == nullptr) {
+      LBANN_ERROR("Current implementation of callback \"", name(), "\" "
+                  "requires a weight object to perturb");
+    }
+}
+
+void perturb_weights::on_batch_begin(model* m) {
+  const auto& c = m->get_execution_context();
+  weights* m_output = nullptr;
+
+  for (auto* w : m->get_weights()) {
+      if(w->get_name() == m_output_name){
+        m_output = w;
+        break;
+      }
+   }
+
+  if (m_output != nullptr &&
+      c.get_step() % m_batch_interval == 0 &&
+      c.get_execution_mode() == execution_mode::training) {
+    perturb(*m);
+  }
+}
+
+void perturb_weights::perturb(model& m){
+
+  auto* comm = m.get_comm();
+
+  // Useful constants
+  constexpr DataType zero = 0;
+  constexpr DataType one = 1;
+  DataType lower = m_lower;
+  DataType upper = m_upper;
+  DataType scale = m_scale;
+  DataType thres = one - m_perturb_probability;
+
+
+
+  // RNG
+  auto& gen = get_generator();
+  std::normal_distribution<DataType> norm(zero, one);
+  std::uniform_real_distribution<DataType> uni(zero,one);
+
+
+  for (auto* w : m.get_weights()) {
+    if (w == nullptr) {
+    	LBANN_ERROR("callback \"", name(), "\" "
+                  "got a weights pointer that is a null pointer");
+    }
+
+    // Check layer name
+    if(w->get_name() == m_output_name) {
+	auto& values = w->get_values();
+	auto& new_values = dynamic_cast<El::AbstractDistMatrix<DataType>&>(values);
+
+	auto& local_values = new_values.Matrix();
+	El::Matrix<DataType,El::Device::CPU> temp;
+	El::Copy(local_values, temp);
+
+	// Perturb weights on master process
+	if (comm->am_trainer_master()) {
+		for (auto i = 0; i < temp.Height(); i++){
+
+
+			// perturb
+			auto val = temp.Get(i,0);
+			auto perturbed_val = val;
+
+			if(uni(gen) > thres){
+				perturbed_val += norm(gen)*scale;
+				perturbed_val = std::min(std::max(perturbed_val, lower), upper);
+			}
+
+
+			temp.Set(i,0,perturbed_val);
+
+			El::Copy(temp, local_values);
+
+			std::cout << "Trainer [ " << m.get_comm()->get_trainer_rank() << " ], Step " << m.get_execution_context().get_step();
+			std::cout << " Weight " << i << ": " << val << " Perturbed weight  " <<  perturbed_val << std::endl;
+
+		}
+	}
+
+	// Communicate new weight from trainer master processes
+	El::Broadcast(new_values, comm->get_trainer_comm(), 0);
+
+	// Update weight
+	auto& out_w = dynamic_cast<data_type_weights<DataType>&>(*w);
+	out_w.set_values(new_values);
+
+	break;
+    }
+  }
+}
+
+
+std::unique_ptr<callback_base>
+build_perturb_weights_callback_from_pbuf(
+  const google::protobuf::Message& proto_msg, const std::shared_ptr<lbann_summary>&) {
+  const auto& params =
+    dynamic_cast<const lbann_data::Callback::CallbackPerturbWeights&>(proto_msg);
+  return make_unique<perturb_weights>(
+    params.upper(),
+    params.lower(),
+    params.scale(),
+    params.perturb_probability(),
+    params.output_name(),
+    params.batch_interval());
+}
+
+} // namespace callback
+} // namespace lbann
+
+#define LBANN_CLASS_NAME callback::perturb_weights
+#include <lbann/macros/register_class_with_cereal.hpp>
diff --git a/src/callbacks/print_statistics.cpp b/src/callbacks/print_statistics.cpp
index afaa3ef8066..76e0d9ee2e1 100644
--- a/src/callbacks/print_statistics.cpp
+++ b/src/callbacks/print_statistics.cpp
@@ -185,7 +185,7 @@ void print_statistics::report_results(model *m) {
 
     auto& arg_parser = global_argument_parser();
     bool allow_global_statistics =
-      arg_parser.get<bool>(LTFB_ALLOW_GLOBAL_STATISTICS);
+      arg_parser.get<bool>(LBANN_OPTION_LTFB_ALLOW_GLOBAL_STATISTICS);
     std::stringstream report;
 
     // Report objective function value
diff --git a/src/callbacks/timer.cpp b/src/callbacks/timer.cpp
index 4816b0b77cd..cc156bd09e9 100644
--- a/src/callbacks/timer.cpp
+++ b/src/callbacks/timer.cpp
@@ -144,7 +144,7 @@ void timer::timing_end(model& m) {
 
     auto& arg_parser = global_argument_parser();
     bool allow_global_statistics =
-      arg_parser.get<bool>(LTFB_ALLOW_GLOBAL_STATISTICS);
+      arg_parser.get<bool>(LBANN_OPTION_LTFB_ALLOW_GLOBAL_STATISTICS);
     std::stringstream report;
 
     if(allow_global_statistics) {
diff --git a/src/data_coordinator/data_coordinator.cpp b/src/data_coordinator/data_coordinator.cpp
index 8b57915f908..2154046f792 100644
--- a/src/data_coordinator/data_coordinator.cpp
+++ b/src/data_coordinator/data_coordinator.cpp
@@ -71,10 +71,10 @@ void data_coordinator::setup(thread_pool& io_thread_pool, int max_mini_batch_siz
   }
 
   auto& arg_parser = global_argument_parser();
-  if (arg_parser.get<bool>(USE_DATA_STORE) ||
-      arg_parser.get<bool>(PRELOAD_DATA_STORE) ||
-      arg_parser.get<bool>(DATA_STORE_CACHE) ||
-      arg_parser.get<std::string>(DATA_STORE_SPILL) != "") {
+  if (arg_parser.get<bool>(LBANN_OPTION_USE_DATA_STORE) ||
+      arg_parser.get<bool>(LBANN_OPTION_PRELOAD_DATA_STORE) ||
+      arg_parser.get<bool>(LBANN_OPTION_DATA_STORE_CACHE) ||
+      arg_parser.get<std::string>(LBANN_OPTION_DATA_STORE_SPILL) != "") {
     bool master = m_comm->am_world_master();
     if (master) {
       std::cout << "\nUSING DATA STORE!\n\n";
diff --git a/src/data_readers/data_reader.cpp b/src/data_readers/data_reader.cpp
index c8723b6212e..fb9b3f2fdba 100644
--- a/src/data_readers/data_reader.cpp
+++ b/src/data_readers/data_reader.cpp
@@ -652,10 +652,10 @@ double generic_data_reader::get_use_percent() const {
 void generic_data_reader::instantiate_data_store() {
   double tm1 = get_time();
   auto& arg_parser = global_argument_parser();
-  if (!(arg_parser.get<bool>(USE_DATA_STORE) ||
-        arg_parser.get<bool>(PRELOAD_DATA_STORE) ||
-        arg_parser.get<bool>(DATA_STORE_CACHE) ||
-        arg_parser.get<std::string>(DATA_STORE_SPILL) != "")) {
+  if (!(arg_parser.get<bool>(LBANN_OPTION_USE_DATA_STORE) ||
+        arg_parser.get<bool>(LBANN_OPTION_PRELOAD_DATA_STORE) ||
+        arg_parser.get<bool>(LBANN_OPTION_DATA_STORE_CACHE) ||
+        arg_parser.get<std::string>(LBANN_OPTION_DATA_STORE_SPILL) != "")) {
     if (m_data_store != nullptr) {
       delete m_data_store;
       m_data_store = nullptr;
@@ -671,7 +671,7 @@ void generic_data_reader::instantiate_data_store() {
     LBANN_ERROR("shuffled_indices.size() == 0");
   }
 
-  if (arg_parser.get<bool>(NODE_SIZES_VARY)) {
+  if (arg_parser.get<bool>(LBANN_OPTION_NODE_SIZES_VARY)) {
     m_data_store->set_node_sizes_vary();
   }
 
diff --git a/src/data_readers/data_reader_HDF5.cpp b/src/data_readers/data_reader_HDF5.cpp
index 43438d32db6..c7d31a12072 100644
--- a/src/data_readers/data_reader_HDF5.cpp
+++ b/src/data_readers/data_reader_HDF5.cpp
@@ -216,7 +216,7 @@ void hdf5_data_reader::load()
   double tm11 = tm1;
   auto& arg_parser = global_argument_parser();
 
-  if (arg_parser.get<bool>(KEEP_PACKED_FIELDS)) {
+  if (arg_parser.get<bool>(LBANN_OPTION_KEEP_PACKED_FIELDS)) {
     m_delete_packed_fields = false;
   }
 
@@ -224,7 +224,7 @@ void hdf5_data_reader::load()
   // with data store
   // TODO MRW
   // opts->set_option("preload_data_store", true);
-  if (!arg_parser.get<bool>(USE_DATA_STORE)) {
+  if (!arg_parser.get<bool>(LBANN_OPTION_USE_DATA_STORE)) {
     LBANN_ERROR("HDF5 data reader requires the data store.",
                 "Set command line arguments --use_data_store --preload_data_store");
   }
diff --git a/src/data_readers/data_reader_hdf5_legacy.cpp b/src/data_readers/data_reader_hdf5_legacy.cpp
index 0f76cf3097a..41326ca1dd1 100644
--- a/src/data_readers/data_reader_hdf5_legacy.cpp
+++ b/src/data_readers/data_reader_hdf5_legacy.cpp
@@ -60,7 +60,7 @@ hdf5_reader<TensorDataType>::hdf5_reader(const bool shuffle,
                                          const std::string key_responses,
                                          const bool hyperslab_labels)
   : generic_data_reader(shuffle),
-    m_use_data_store(global_argument_parser().get<bool>(USE_DATA_STORE)),
+    m_use_data_store(global_argument_parser().get<bool>(LBANN_OPTION_USE_DATA_STORE)),
     m_key_data(key_data),
     m_key_labels(key_labels),
     m_key_responses(key_responses),
@@ -230,7 +230,7 @@ void hdf5_reader<TensorDataType>::load() {
 #endif
   std::vector<int> local_list_sizes;
   auto& arg_parser = global_argument_parser();
-  if (arg_parser.get<bool>(PRELOAD_DATA_STORE)) {
+  if (arg_parser.get<bool>(LBANN_OPTION_PRELOAD_DATA_STORE)) {
     LBANN_ERROR("preload_data_store not supported on HDF5 data reader");
   }
   if (m_use_data_store) {
diff --git a/src/data_readers/data_reader_image.cpp b/src/data_readers/data_reader_image.cpp
index f2636abab76..363ddfbc5c6 100644
--- a/src/data_readers/data_reader_image.cpp
+++ b/src/data_readers/data_reader_image.cpp
@@ -165,7 +165,7 @@ void image_data_reader::load() {
     load_list_of_samples(sample_list_file);
   }
 
-  if (arg_parser.get<bool>(WRITE_SAMPLE_LIST) && m_comm->am_trainer_master()) {
+  if (arg_parser.get<bool>(LBANN_OPTION_WRITE_SAMPLE_LIST) && m_comm->am_trainer_master()) {
     const std::string slist_name = (m_sample_list.get_header()).get_sample_list_name();
     std::stringstream s;
     std::string basename = get_basename_without_ext(slist_name);
@@ -180,7 +180,7 @@ void image_data_reader::load() {
   }
   if (arg_parser.get<bool>("write_sample_label_list") &&
       m_comm->am_trainer_master()) {
-    if (!(m_keep_sample_order || arg_parser.get<bool>(KEEP_SAMPLE_ORDER))) {
+    if (!(m_keep_sample_order || arg_parser.get<bool>(LBANN_OPTION_KEEP_SAMPLE_ORDER))) {
       std::cout << "Writting sample label list without the option "
                 << "`keep_sample_order' set." << std::endl;
     }
@@ -232,7 +232,7 @@ void image_data_reader::do_preload_data_store() {
 
   int rank = m_comm->get_rank_in_trainer();
 
-  bool threaded = !arg_parser.get<bool>(DATA_STORE_NO_THREAD);
+  bool threaded = !arg_parser.get<bool>(LBANN_OPTION_DATA_STORE_NO_THREAD);
   if (threaded) {
     if (get_comm()->am_world_master()) {
       std::cout << "mode: data_store_thread\n";
@@ -333,20 +333,20 @@ void image_data_reader::load_list_of_samples(const std::string sample_list_file)
 
   auto& arg_parser = global_argument_parser();
 
-  if (m_keep_sample_order || arg_parser.get<bool>(KEEP_SAMPLE_ORDER)) {
+  if (m_keep_sample_order || arg_parser.get<bool>(LBANN_OPTION_KEEP_SAMPLE_ORDER)) {
     m_sample_list.keep_sample_order(true);
   }
   else {
     m_sample_list.keep_sample_order(false);
   }
 
-  if (arg_parser.get<bool>(CHECK_DATA)) {
+  if (arg_parser.get<bool>(LBANN_OPTION_CHECK_DATA)) {
     m_sample_list.set_data_file_check();
   }
 
   std::vector<char> buffer;
 
-  if (arg_parser.get<bool>(LOAD_FULL_SAMPLE_LIST_ONCE)) {
+  if (arg_parser.get<bool>(LBANN_OPTION_LOAD_FULL_SAMPLE_LIST_ONCE)) {
     if (m_comm->am_trainer_master()) {
       load_file(sample_list_file, buffer);
     }
@@ -426,20 +426,20 @@ void image_data_reader::gen_list_of_samples() {
 
   auto& arg_parser = global_argument_parser();
 
-  if (m_keep_sample_order || arg_parser.get<bool>(KEEP_SAMPLE_ORDER)) {
+  if (m_keep_sample_order || arg_parser.get<bool>(LBANN_OPTION_KEEP_SAMPLE_ORDER)) {
     m_sample_list.keep_sample_order(true);
   }
   else {
     m_sample_list.keep_sample_order(false);
   }
 
-  if (arg_parser.get<bool>(CHECK_DATA)) {
+  if (arg_parser.get<bool>(LBANN_OPTION_CHECK_DATA)) {
     m_sample_list.set_data_file_check();
   }
 
   std::vector<char> buffer;
 
-  if (arg_parser.get<bool>(LOAD_FULL_SAMPLE_LIST_ONCE)) {
+  if (arg_parser.get<bool>(LBANN_OPTION_LOAD_FULL_SAMPLE_LIST_ONCE)) {
     // The trainer master loads the entire file into a buffer in the memory
     if (m_comm->am_trainer_master()) {
       load_file(imageListFile, buffer);
@@ -509,7 +509,7 @@ void image_data_reader::read_labels(std::istream& istrm) {
   m_sample_list.build_sample_map_from_name_to_index();
 
   auto& arg_parser = global_argument_parser();
-  const bool check_data = arg_parser.get<bool>(CHECK_DATA);
+  const bool check_data = arg_parser.get<bool>(LBANN_OPTION_CHECK_DATA);
 
   m_labels.clear();
   m_labels.resize(num_samples);
diff --git a/src/data_readers/data_reader_jag_conduit.cpp b/src/data_readers/data_reader_jag_conduit.cpp
index 48cec4f88d0..e3a75a52d7f 100644
--- a/src/data_readers/data_reader_jag_conduit.cpp
+++ b/src/data_readers/data_reader_jag_conduit.cpp
@@ -764,7 +764,7 @@ void data_reader_jag_conduit::load() {
   load_list_of_samples(sample_list_file);
 
   auto& arg_parser = global_argument_parser();
-  if (arg_parser.get<bool>(WRITE_SAMPLE_LIST) && m_comm->am_trainer_master()) {
+  if (arg_parser.get<bool>(LBANN_OPTION_WRITE_SAMPLE_LIST) && m_comm->am_trainer_master()) {
     {
       const std::string msg = " writing sample list " + sample_list_file;
       LBANN_WARNING(msg);
@@ -798,7 +798,7 @@ void data_reader_jag_conduit::do_preload_data_store() {
   auto& arg_parser = global_argument_parser();
   double tm1 = get_time();
   if (get_comm()->am_world_master() ||
-      (arg_parser.get<bool>(LTFB_VERBOSE) && get_comm()->am_trainer_master())) {
+      (arg_parser.get<bool>(LBANN_OPTION_LTFB_VERBOSE) && get_comm()->am_trainer_master())) {
     LBANN_WARNING("starting preload for role: ", get_role());
   }
 
@@ -836,7 +836,7 @@ void data_reader_jag_conduit::do_preload_data_store() {
   }
 
   if (get_comm()->am_world_master() ||
-      (arg_parser.get<bool>(LTFB_VERBOSE) && get_comm()->am_trainer_master())) {
+      (arg_parser.get<bool>(LBANN_OPTION_LTFB_VERBOSE) && get_comm()->am_trainer_master())) {
     std::stringstream msg;
     msg << " loading data for role: " << get_role() << " took " << get_time() - tm1 << "s";
     LBANN_WARNING(msg.str());
@@ -879,14 +879,14 @@ void data_reader_jag_conduit::load_list_of_samples(const std::string sample_list
 
   auto& arg_parser = global_argument_parser();
 
-  if (this->m_keep_sample_order || arg_parser.get<bool>(KEEP_SAMPLE_ORDER)) {
+  if (this->m_keep_sample_order || arg_parser.get<bool>(LBANN_OPTION_KEEP_SAMPLE_ORDER)) {
     m_sample_list.keep_sample_order(true);
   }
   else {
     m_sample_list.keep_sample_order(false);
   }
 
-  const bool check_data = arg_parser.get<bool>(CHECK_DATA);
+  const bool check_data = arg_parser.get<bool>(LBANN_OPTION_CHECK_DATA);
 
   if (check_data) {
     m_sample_list.set_data_file_check();
@@ -894,7 +894,7 @@ void data_reader_jag_conduit::load_list_of_samples(const std::string sample_list
 
   std::vector<char> buffer;
 
-  if (arg_parser.get<bool>(LOAD_FULL_SAMPLE_LIST_ONCE)) {
+  if (arg_parser.get<bool>(LBANN_OPTION_LOAD_FULL_SAMPLE_LIST_ONCE)) {
     if (m_comm->am_trainer_master()) {
       load_file(sample_list_file, buffer);
     }
diff --git a/src/data_readers/data_reader_npz_ras_lipid.cpp b/src/data_readers/data_reader_npz_ras_lipid.cpp
index 66a789c302f..25a57de8f6b 100644
--- a/src/data_readers/data_reader_npz_ras_lipid.cpp
+++ b/src/data_readers/data_reader_npz_ras_lipid.cpp
@@ -118,8 +118,8 @@ void ras_lipid_conduit_data_reader::load() {
 
   // Get the number of samples that will be combined into a multi-sample
   m_seq_len = 1;
-  if (arg_parser.get<int>(SEQUENCE_LENGTH) != -1) {
-    m_seq_len = arg_parser.get<int>(SEQUENCE_LENGTH);
+  if (arg_parser.get<int>(LBANN_OPTION_SEQUENCE_LENGTH) != -1) {
+    m_seq_len = arg_parser.get<int>(LBANN_OPTION_SEQUENCE_LENGTH);
   }
 
   // set the number of labels
@@ -224,14 +224,14 @@ data types, from python+numpy:
 
   // Variables only used for user feedback
   auto& arg_parser = global_argument_parser();
-  bool verbose = arg_parser.get<bool>(VERBOSE);
+  bool verbose = arg_parser.get<bool>(LBANN_OPTION_VERBOSE);
   int np = m_comm->get_procs_per_trainer();
   size_t nn = 0;
 
   std::vector<conduit::Node> work(m_seq_len);
 
   // option and variables only used for testing during development
-  bool debug_concatenate = arg_parser.get<bool>(DEBUG_CONCATENATE);
+  bool debug_concatenate = arg_parser.get<bool>(LBANN_OPTION_DEBUG_CONCATENATE);
   if (m_seq_len > 1) {
     debug_concatenate = false;
   }
@@ -444,7 +444,7 @@ void ras_lipid_conduit_data_reader::write_file_sizes() {
     return;
   }
   std::string fn =
-    global_argument_parser().get<std::string>(PILOT2_SAVE_FILE_SIZES);
+    global_argument_parser().get<std::string>(LBANN_OPTION_PILOT2_SAVE_FILE_SIZES);
   std::ofstream out(fn.c_str());
   if (!out) {
     LBANN_ERROR("failed to open ", fn, " for writing");
@@ -457,7 +457,7 @@ void ras_lipid_conduit_data_reader::write_file_sizes() {
 
 void ras_lipid_conduit_data_reader::read_file_sizes() {
   std::string fn =
-    global_argument_parser().get<std::string>(PILOT2_READ_FILE_SIZES);
+    global_argument_parser().get<std::string>(LBANN_OPTION_PILOT2_READ_FILE_SIZES);
   std::ifstream in(fn.c_str());
   if (!in) {
     LBANN_ERROR("failed to open ", fn, " for reading");
@@ -483,9 +483,9 @@ void ras_lipid_conduit_data_reader::read_normalization_data() {
   m_use_min_max = false;
   m_use_z_score = false;
   auto& arg_parser = global_argument_parser();
-  if (arg_parser.get<std::string>(NORMALIZATION) != "") {
+  if (arg_parser.get<std::string>(LBANN_OPTION_NORMALIZATION) != "") {
     m_use_min_max = true;
-    m_use_z_score = arg_parser.get<bool>(Z_SCORE);
+    m_use_z_score = arg_parser.get<bool>(LBANN_OPTION_Z_SCORE);
     if (get_comm()->am_world_master()) {
       if (m_use_z_score) {
         std::cout << "Normalizing data using z-score" << std::endl;
@@ -494,7 +494,7 @@ void ras_lipid_conduit_data_reader::read_normalization_data() {
       }
     }
 
-    std::string fn = arg_parser.get<std::string>(NORMALIZATION);
+    std::string fn = arg_parser.get<std::string>(LBANN_OPTION_NORMALIZATION);
     std::ifstream in(fn.c_str());
     if (!in) {
       LBANN_ERROR("failed to open ", fn, " for reading");
@@ -546,7 +546,7 @@ void ras_lipid_conduit_data_reader::print_shapes_etc() {
   }
   std::cout << std::endl;
 
-  if (global_argument_parser().get<bool>(VERBOSE)) {
+  if (global_argument_parser().get<bool>(LBANN_OPTION_VERBOSE)) {
     std::cout << "\nAll data shapes:\n";
     for (const auto &t : m_datum_shapes) {
       std::cout << "  " << t.first << " ";
diff --git a/src/data_readers/data_reader_numpy_npz_conduit.cpp b/src/data_readers/data_reader_numpy_npz_conduit.cpp
index 8f2689d95f5..12c2cfadad3 100644
--- a/src/data_readers/data_reader_numpy_npz_conduit.cpp
+++ b/src/data_readers/data_reader_numpy_npz_conduit.cpp
@@ -79,8 +79,8 @@ void numpy_npz_conduit_reader::load() {
 
   auto& arg_parser = global_argument_parser();
 
-  if (!(arg_parser.get<bool>(PRELOAD_DATA_STORE) ||
-        arg_parser.get<bool>(USE_DATA_STORE))) {
+  if (!(arg_parser.get<bool>(LBANN_OPTION_PRELOAD_DATA_STORE) ||
+        arg_parser.get<bool>(LBANN_OPTION_USE_DATA_STORE))) {
     LBANN_ERROR("numpy_npz_conduit_reader requires data_store; please pass either --use_data_store or --preload_data_store on the cmd line");
   }
 
@@ -104,8 +104,8 @@ void numpy_npz_conduit_reader::load() {
   resize_shuffled_indices();
   m_num_samples = m_shuffled_indices.size();
 
-  if (m_num_labels == 0 && !arg_parser.get<bool>(PRELOAD_DATA_STORE) &&
-      arg_parser.get<bool>(USE_DATA_STORE)) {
+  if (m_num_labels == 0 && !arg_parser.get<bool>(LBANN_OPTION_PRELOAD_DATA_STORE) &&
+      arg_parser.get<bool>(LBANN_OPTION_USE_DATA_STORE)) {
     LBANN_WARNING("when not preloading you must specify the number of labels in the prototext file if you are doing classification");
   }
 
@@ -129,7 +129,7 @@ void numpy_npz_conduit_reader::do_preload_data_store() {
 
   std::unordered_set<int> label_classes;
 
-  bool threaded = !global_argument_parser().get<bool>(DATA_STORE_NO_THREAD);
+  bool threaded = !global_argument_parser().get<bool>(LBANN_OPTION_DATA_STORE_NO_THREAD);
 
   //threaded mode
   if (threaded) {
diff --git a/src/data_readers/data_reader_smiles.cpp b/src/data_readers/data_reader_smiles.cpp
index 2564f54ac49..9fe53a6eb77 100644
--- a/src/data_readers/data_reader_smiles.cpp
+++ b/src/data_readers/data_reader_smiles.cpp
@@ -119,19 +119,19 @@ void smiles_data_reader::load() {
   set_use_data_store(true);
 
   if (m_sequence_length == 0) {
-    if (arg_parser.get<int>(SEQUENCE_LENGTH) == -1) {
+    if (arg_parser.get<int>(LBANN_OPTION_SEQUENCE_LENGTH) == -1) {
       LBANN_ERROR("you must pass --sequence_length=<int> on the cmd line or call set_sequence_length()");
     }
-    m_sequence_length = arg_parser.get<int>(SEQUENCE_LENGTH);
+    m_sequence_length = arg_parser.get<int>(LBANN_OPTION_SEQUENCE_LENGTH);
   }
   m_linearized_data_size = m_sequence_length+2;
 
   // load the vocabulary; this is a map: string -> short
   if (m_vocab.size() == 0) {
-    if (arg_parser.get<std::string>(VOCAB) == "") {
+    if (arg_parser.get<std::string>(LBANN_OPTION_VOCAB) == "") {
       LBANN_ERROR("you must either pass --vocab=<string> on the command line or call load_vocab(...)");
     }
-    const std::string fn = arg_parser.get<std::string>(VOCAB);
+    const std::string fn = arg_parser.get<std::string>(LBANN_OPTION_VOCAB);
     load_vocab(fn);
   } else {
     LBANN_ERROR("you passed --vocab=<string>, but it looks like load_vocab() was previously called. You must use one or the other.");
@@ -181,7 +181,7 @@ void smiles_data_reader::do_preload_data_store() {
   for (const auto &filename : my_ordering) {
     std::ifstream in;
     auto& arg_parser = global_argument_parser();
-    size_t buf_size = arg_parser.get<size_t>(SMILES_BUFFER_SIZE);
+    size_t buf_size = arg_parser.get<size_t>(LBANN_OPTION_SMILES_BUFFER_SIZE);
     char iobuffer [buf_size];
     in.rdbuf()->pubsetbuf(iobuffer,buf_size);
     in.open(filename.c_str(), std::ios::binary | std::ios::ate);
@@ -589,7 +589,7 @@ void smiles_data_reader::read_offset_data(std::vector<SampleData> &data) {
     if (m_filename_to_local_id_set.find(data_filenames[j]) != m_filename_to_local_id_set.end()) {
       std::ifstream in;
       auto& arg_parser = global_argument_parser();
-      size_t buf_size = arg_parser.get<size_t>(SMILES_BUFFER_SIZE);
+      size_t buf_size = arg_parser.get<size_t>(LBANN_OPTION_SMILES_BUFFER_SIZE);
       char iobuffer [buf_size];
       in.rdbuf()->pubsetbuf(iobuffer,buf_size);
       in.open(offsets_filenames[j], std::ios::binary);
diff --git a/src/data_store/data_store_conduit.cpp b/src/data_store/data_store_conduit.cpp
index c71358d180b..b7aa2d96aa3 100644
--- a/src/data_store/data_store_conduit.cpp
+++ b/src/data_store/data_store_conduit.cpp
@@ -80,23 +80,23 @@ data_store_conduit::data_store_conduit(
   auto& arg_parser = global_argument_parser();
 
   // For use in testing
-  if (arg_parser.get<bool>(DATA_STORE_FAIL)) {
+  if (arg_parser.get<bool>(LBANN_OPTION_DATA_STORE_FAIL)) {
     LBANN_ERROR("data_store_conduit is throwing a fake exception; this is for use during testing");
   }
 
-  if (arg_parser.get<std::string>(DATA_STORE_TEST_CHECKPOINT) != "" &&
-      arg_parser.get<std::string>(DATA_STORE_SPILL) != "") {
+  if (arg_parser.get<std::string>(LBANN_OPTION_DATA_STORE_TEST_CHECKPOINT) != "" &&
+      arg_parser.get<std::string>(LBANN_OPTION_DATA_STORE_SPILL) != "") {
     LBANN_ERROR("you passed both --data_store_test_checkpoint and --data_store_spill; please use one or the other or none, but not both");
   }
-  if (arg_parser.get<std::string>(DATA_STORE_TEST_CHECKPOINT) != "") {
+  if (arg_parser.get<std::string>(LBANN_OPTION_DATA_STORE_TEST_CHECKPOINT) != "") {
     setup_checkpoint_test();
   }
-  if (arg_parser.get<std::string>(DATA_STORE_SPILL) != "") {
-    setup_spill(arg_parser.get<std::string>(DATA_STORE_SPILL));
+  if (arg_parser.get<std::string>(LBANN_OPTION_DATA_STORE_SPILL) != "") {
+    setup_spill(arg_parser.get<std::string>(LBANN_OPTION_DATA_STORE_SPILL));
   }
 
-  set_is_local_cache(arg_parser.get<bool>(DATA_STORE_CACHE));
-  set_is_preloading(arg_parser.get<bool>(PRELOAD_DATA_STORE));
+  set_is_local_cache(arg_parser.get<bool>(LBANN_OPTION_DATA_STORE_CACHE));
+  set_is_preloading(arg_parser.get<bool>(LBANN_OPTION_PRELOAD_DATA_STORE));
   set_is_explicitly_loading(! is_preloading());
 
   if (is_local_cache()) {
@@ -134,7 +134,7 @@ data_store_conduit::~data_store_conduit() {
 
 void data_store_conduit::setup_checkpoint_test() {
   auto& arg_parser = global_argument_parser();
-  std::string c = arg_parser.get<std::string>(DATA_STORE_TEST_CHECKPOINT);
+  std::string c = arg_parser.get<std::string>(LBANN_OPTION_DATA_STORE_TEST_CHECKPOINT);
   // TODO MRW
   if (c == "1") {
     LBANN_ERROR("--data_store_test_checkpoint=1; you probably forgot to specify the spill directory; you must specify --data_store_test_checkpoint=<string>'");
@@ -983,7 +983,7 @@ bool data_store_conduit::is_fully_loaded() const {
 void data_store_conduit::get_image_sizes(map_is_t &file_sizes, std::vector<std::vector<int>> &indices) {
   auto& arg_parser = global_argument_parser();
   /// this block fires if image sizes have been precomputed
-  if (arg_parser.get<std::string>(IMAGE_SIZES_FILENAME) != "") {
+  if (arg_parser.get<std::string>(LBANN_OPTION_IMAGE_SIZES_FILENAME) != "") {
     LBANN_ERROR("not yet implemented");
     //TODO dah - implement, if this becomes a bottleneck (but I don't think it will)
   }
@@ -1200,7 +1200,7 @@ void data_store_conduit::exchange_local_caches() {
   set_loading_is_complete();
 
   auto& arg_parser = global_argument_parser();
-  if (arg_parser.get<bool>(DATA_STORE_TEST_CACHE)) {
+  if (arg_parser.get<bool>(LBANN_OPTION_DATA_STORE_TEST_CACHE)) {
     test_local_cache_imagenet(20);
   }
 }
@@ -1436,7 +1436,7 @@ void data_store_conduit::profile_timing() {
         "  wait alls:                ", m_wait_all_time, "\n",
         "  unpacking rcvd nodes:     ", m_rebuild_time, "\n\n");
 
-    if (arg_parser.get<bool>(DATA_STORE_MIN_MAX_TIMING)) {
+    if (arg_parser.get<bool>(LBANN_OPTION_DATA_STORE_MIN_MAX_TIMING)) {
       std::vector<double> send;
       static int count = 5;
       send.reserve(count);
@@ -1867,7 +1867,7 @@ void data_store_conduit::open_informational_files() {
   }
 
   // optionally, each <rank, reader_role> pair opens a debug file
-  if (arg_parser.get<bool>(DATA_STORE_DEBUG) && !m_debug &&
+  if (arg_parser.get<bool>(LBANN_OPTION_DATA_STORE_DEBUG) && !m_debug &&
       m_reader != nullptr) {
     m_debug_filename = m_debug_filename_base + "_" + m_reader->get_role() + "." + std::to_string(m_comm->get_rank_in_world()) + ".txt";
     m_debug = new std::ofstream(m_debug_filename.c_str());
@@ -1877,7 +1877,7 @@ void data_store_conduit::open_informational_files() {
   }
 
   // optionally, <P_0, reader_role> pair opens a file for writing
-  if (arg_parser.get<bool>(DATA_STORE_PROFILE) && m_world_master &&
+  if (arg_parser.get<bool>(LBANN_OPTION_DATA_STORE_PROFILE) && m_world_master &&
       !m_profile && m_reader != nullptr) {
     m_profile_filename = m_profile_filename_base + "_" + m_reader->get_role() + ".txt";
     m_profile = new std::ofstream(m_profile_filename.c_str());
diff --git a/src/layers/data_type_layer.cpp b/src/layers/data_type_layer.cpp
index 1721ff22a37..85e4ca515d6 100644
--- a/src/layers/data_type_layer.cpp
+++ b/src/layers/data_type_layer.cpp
@@ -689,7 +689,7 @@ setup_matrices(const El::Grid& grid) {
   /// training with persistent error signals
   if (this->get_device_allocation() == El::Device::GPU) {
     const auto& arg_parser = global_argument_parser();
-    if (!arg_parser.get<bool>(USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP)) {
+    if (!arg_parser.get<bool>(LBANN_OPTION_USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP)) {
       for (auto& input : m_inputs) {
         input->Matrix().SetMemoryMode(0); // Directly-allocated memory
       }
diff --git a/src/optimizers/adam.cpp b/src/optimizers/adam.cpp
index 73b5eb7f85a..6a8922c548a 100644
--- a/src/optimizers/adam.cpp
+++ b/src/optimizers/adam.cpp
@@ -113,7 +113,7 @@ void adam<TensorDataType>::setup(WeightsType* w) {
   if (m_moment1->GetLocalDevice() == El::Device::GPU
       && m_moment2->GetLocalDevice() == El::Device::GPU) {
     const auto& arg_parser = global_argument_parser();
-    if (!arg_parser.get<bool>(USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP)) {
+    if (!arg_parser.get<bool>(LBANN_OPTION_USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP)) {
       m_moment1->Matrix().SetMemoryMode(0); // Directly-allocated memory
       m_moment2->Matrix().SetMemoryMode(0); // Directly-allocated memory
     }
diff --git a/src/optimizers/sgd.cpp b/src/optimizers/sgd.cpp
index 2c71086775b..22a40519d90 100644
--- a/src/optimizers/sgd.cpp
+++ b/src/optimizers/sgd.cpp
@@ -87,7 +87,7 @@ void sgd<TensorDataType>::setup(WeightsType* w) {
 #ifdef LBANN_HAS_GPU
   if (m_velocity->GetLocalDevice() == El::Device::GPU) {
     const auto& arg_parser = global_argument_parser();
-    if (!arg_parser.get<bool>(USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP)) {
+    if (!arg_parser.get<bool>(LBANN_OPTION_USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP)) {
       m_velocity->Matrix().SetMemoryMode(0); // Directly-allocated memory
     }
   }
diff --git a/src/proto/factories/layer_factory.cpp b/src/proto/factories/layer_factory.cpp
index f8ed69128b5..72a5ef232e1 100644
--- a/src/proto/factories/layer_factory.cpp
+++ b/src/proto/factories/layer_factory.cpp
@@ -272,7 +272,7 @@ std::unique_ptr<Layer> construct_layer_legacy(
 
   // Input layers
   // Currently this cannot be suitably removed from this function
-  // because it relies on NUM_PARALLEL_READERS and "data_readers"
+  // because it relies on LBANN_OPTION_NUM_PARALLEL_READERS and "data_readers"
   // arguments.
   if (proto_layer.has_input()) {
     const auto& params = proto_layer.input();
@@ -297,7 +297,7 @@ std::unique_ptr<Layer> construct_layer_legacy(
   }
 
   // Currently this cannot be suitably removed from this function
-  // because it relies on NUM_PARALLEL_READERS and "data_readers"
+  // because it relies on LBANN_OPTION_NUM_PARALLEL_READERS and "data_readers"
   // arguments.
   if (proto_layer.has_deconvolution()) {
     const auto& params = proto_layer.deconvolution();
@@ -364,7 +364,7 @@ std::unique_ptr<Layer> construct_layer_legacy(
 
   // Transform layers
   // Currently this cannot be suitably removed from this function
-  // because it relies on NUM_PARALLEL_READERS and "data_readers"
+  // because it relies on LBANN_OPTION_NUM_PARALLEL_READERS and "data_readers"
   // arguments.
   if (proto_layer.has_reshape()) {
     const auto& params = proto_layer.reshape();
@@ -383,7 +383,7 @@ std::unique_ptr<Layer> construct_layer_legacy(
   }
 
   // Currently this cannot be suitably removed from this function
-  // because it relies on NUM_PARALLEL_READERS and "data_readers"
+  // because it relies on LBANN_OPTION_NUM_PARALLEL_READERS and "data_readers"
   // arguments.
   if (proto_layer.has_slice()) {
     const auto& params = proto_layer.slice();
diff --git a/src/proto/proto_common.cpp b/src/proto/proto_common.cpp
index 16addbd1ee8..d449dd41d7d 100644
--- a/src/proto/proto_common.cpp
+++ b/src/proto/proto_common.cpp
@@ -417,9 +417,9 @@ void init_data_readers(
       reader->set_local_file_dir( readme.data_local_filedir() );
     }
 
-    if (arg_parser.get<bool>(CREATE_TARBALL)) {
-      if (arg_parser.get<int>(TEST_TARBALL) != -1) {
-        reader->set_absolute_sample_count(arg_parser.get<int>(TEST_TARBALL));
+    if (arg_parser.get<bool>(LBANN_OPTION_CREATE_TARBALL)) {
+      if (arg_parser.get<int>(LBANN_OPTION_TEST_TARBALL) != -1) {
+        reader->set_absolute_sample_count(arg_parser.get<int>(LBANN_OPTION_TEST_TARBALL));
         reader->set_use_percent( 0. );
         reader->set_first_n(0);
       }
@@ -450,13 +450,13 @@ void init_data_readers(
       reader->set_role("error");
     }
     if (readme.role() == "train") {
-      if (arg_parser.get<bool>(CREATE_TARBALL) || separate_validation) {
+      if (arg_parser.get<bool>(LBANN_OPTION_CREATE_TARBALL) || separate_validation) {
         reader->set_execution_mode_split_percent(execution_mode::validation, 0. );
       }
       else {
         reader->set_execution_mode_split_percent(execution_mode::validation, readme.validation_percent() );
       }
-      if (arg_parser.get<bool>(CREATE_TARBALL) || separate_tournament) {
+      if (arg_parser.get<bool>(LBANN_OPTION_CREATE_TARBALL) || separate_tournament) {
         reader->set_execution_mode_split_percent(execution_mode::tournament, 0. );
       }
       else {
@@ -480,7 +480,7 @@ void init_data_readers(
       data_readers[execution_mode::tournament] = reader;
     }
 
-    if (readme.role() == "train" && !arg_parser.get<bool>(CREATE_TARBALL)) {
+    if (readme.role() == "train" && !arg_parser.get<bool>(LBANN_OPTION_CREATE_TARBALL)) {
       for(auto m : execution_mode_iterator()) {
         if((m == execution_mode::validation && readme.validation_percent() > 0. && !separate_validation)
            || (m == execution_mode::tournament && readme.tournament_percent() > 0. && !separate_tournament)) {
@@ -767,7 +767,7 @@ void set_data_readers_sample_list(
 void set_data_readers_percent(lbann_data::LbannPB& p)
 {
   auto& arg_parser = global_argument_parser();
-  double percent = arg_parser.get<float>(DATA_READER_PERCENT);
+  double percent = arg_parser.get<float>(LBANN_OPTION_DATA_READER_PERCENT);
   if (percent <= 0 || percent > 1.0) {
       std::ostringstream err;
       err << __FILE__ << " " << __LINE__ << " :: "
@@ -823,46 +823,46 @@ void get_cmdline_overrides(const lbann_comm& comm, lbann_data::LbannPB& p)
   lbann_data::DataReader *d_reader = p.mutable_data_reader();
   int size = d_reader->reader_size();
 
-  if (arg_parser.get<int>(ABSOLUTE_SAMPLE_COUNT) != -1) {
+  if (arg_parser.get<int>(LBANN_OPTION_ABSOLUTE_SAMPLE_COUNT) != -1) {
     for (int j=0; j<size; j++) {
-      int n = arg_parser.get<int>(ABSOLUTE_SAMPLE_COUNT);
+      int n = arg_parser.get<int>(LBANN_OPTION_ABSOLUTE_SAMPLE_COUNT);
       lbann_data::Reader *readme = d_reader->mutable_reader(j);
       readme->set_percent_of_data_to_use(0.0);
       readme->set_absolute_sample_count(n);
     }
   }
 
-  if ((arg_parser.get<std::string>(DATA_FILEDIR) != "") or
-      (arg_parser.get<std::string>(DATA_FILEDIR_TRAIN) != "") or
-      (arg_parser.get<std::string>(DATA_FILENAME_TRAIN) != "") or
-      (arg_parser.get<std::string>(LABEL_FILENAME_TRAIN) != "")) {
+  if ((arg_parser.get<std::string>(LBANN_OPTION_DATA_FILEDIR) != "") or
+      (arg_parser.get<std::string>(LBANN_OPTION_DATA_FILEDIR_TRAIN) != "") or
+      (arg_parser.get<std::string>(LBANN_OPTION_DATA_FILENAME_TRAIN) != "") or
+      (arg_parser.get<std::string>(LBANN_OPTION_LABEL_FILENAME_TRAIN) != "")) {
     set_data_readers_filenames("train", p);
   }
-  if ((arg_parser.get<std::string>(DATA_FILEDIR) != "") or
-      (arg_parser.get<std::string>(DATA_FILEDIR_VALIDATE) != "") or
-      (arg_parser.get<std::string>(DATA_FILENAME_VALIDATE) != "") or
-      (arg_parser.get<std::string>(LABEL_FILENAME_VALIDATE) != "")) {
+  if ((arg_parser.get<std::string>(LBANN_OPTION_DATA_FILEDIR) != "") or
+      (arg_parser.get<std::string>(LBANN_OPTION_DATA_FILEDIR_VALIDATE) != "") or
+      (arg_parser.get<std::string>(LBANN_OPTION_DATA_FILENAME_VALIDATE) != "") or
+      (arg_parser.get<std::string>(LBANN_OPTION_LABEL_FILENAME_VALIDATE) != "")) {
     set_data_readers_filenames("validate", p);
   }
-  if ((arg_parser.get<std::string>(DATA_FILEDIR) != "") or
-      (arg_parser.get<std::string>(DATA_FILEDIR_TEST) != "") or
-      (arg_parser.get<std::string>(DATA_FILENAME_TEST) != "") or
-      (arg_parser.get<std::string>(LABEL_FILENAME_TEST) != "")) {
+  if ((arg_parser.get<std::string>(LBANN_OPTION_DATA_FILEDIR) != "") or
+      (arg_parser.get<std::string>(LBANN_OPTION_DATA_FILEDIR_TEST) != "") or
+      (arg_parser.get<std::string>(LBANN_OPTION_DATA_FILENAME_TEST) != "") or
+      (arg_parser.get<std::string>(LBANN_OPTION_LABEL_FILENAME_TEST) != "")) {
     set_data_readers_filenames("test", p);
   }
-  if (arg_parser.get<std::string>(SAMPLE_LIST_TRAIN) != "") {
+  if (arg_parser.get<std::string>(LBANN_OPTION_SAMPLE_LIST_TRAIN) != "") {
     set_data_readers_sample_list("train", p);
   }
-  if (arg_parser.get<std::string>(SAMPLE_LIST_VALIDATE) != "") {
+  if (arg_parser.get<std::string>(LBANN_OPTION_SAMPLE_LIST_VALIDATE) != "") {
     set_data_readers_sample_list("validate", p);
   }
-  if (arg_parser.get<std::string>(SAMPLE_LIST_TEST) != "") {
+  if (arg_parser.get<std::string>(LBANN_OPTION_SAMPLE_LIST_TEST) != "") {
     set_data_readers_sample_list("test", p);
   }
-  if (arg_parser.get<float>(DATA_READER_PERCENT) != -1.0) {
+  if (arg_parser.get<float>(LBANN_OPTION_DATA_READER_PERCENT) != -1.0) {
     set_data_readers_percent(p);
   }
-  if (arg_parser.get<bool>(NO_IM_COMM)) {
+  if (arg_parser.get<bool>(LBANN_OPTION_NO_IM_COMM)) {
     int sz = model->callback_size();
     for (int j=0; j<sz; j++) {
       lbann_data::Callback *c = model->mutable_callback(j);
@@ -871,27 +871,27 @@ void get_cmdline_overrides(const lbann_comm& comm, lbann_data::LbannPB& p)
       }
     }
   }
-  if (arg_parser.get<int>(MINI_BATCH_SIZE) != -1) {
-    trainer->set_mini_batch_size(arg_parser.get<int>(MINI_BATCH_SIZE));
+  if (arg_parser.get<int>(LBANN_OPTION_MINI_BATCH_SIZE) != -1) {
+    trainer->set_mini_batch_size(arg_parser.get<int>(LBANN_OPTION_MINI_BATCH_SIZE));
   }
-  if (arg_parser.get<int>(NUM_EPOCHS) != -1) {
-    model->set_num_epochs(arg_parser.get<int>(NUM_EPOCHS));
+  if (arg_parser.get<int>(LBANN_OPTION_NUM_EPOCHS) != -1) {
+    model->set_num_epochs(arg_parser.get<int>(LBANN_OPTION_NUM_EPOCHS));
   }
-  if (arg_parser.get<int>(HYDROGEN_BLOCK_SIZE) != -1) {
-    trainer->set_hydrogen_block_size(arg_parser.get<int>(HYDROGEN_BLOCK_SIZE));
+  if (arg_parser.get<int>(LBANN_OPTION_HYDROGEN_BLOCK_SIZE) != -1) {
+    trainer->set_hydrogen_block_size(arg_parser.get<int>(LBANN_OPTION_HYDROGEN_BLOCK_SIZE));
   }
-  if (arg_parser.get<int>(NUM_PARALLEL_READERS) != -1) {
+  if (arg_parser.get<int>(LBANN_OPTION_NUM_PARALLEL_READERS) != -1) {
     trainer->set_num_parallel_readers(
-      arg_parser.get<int>(NUM_PARALLEL_READERS));
+      arg_parser.get<int>(LBANN_OPTION_NUM_PARALLEL_READERS));
   }
-  if (arg_parser.get<bool>(DISABLE_CUDA)) {
-    model->set_disable_cuda(arg_parser.get<bool>(DISABLE_CUDA));
+  if (arg_parser.get<bool>(LBANN_OPTION_DISABLE_CUDA)) {
+    model->set_disable_cuda(arg_parser.get<bool>(LBANN_OPTION_DISABLE_CUDA));
   }
-  if (arg_parser.get<int>(RANDOM_SEED) == -1) {
-    trainer->set_random_seed(arg_parser.get<int>(RANDOM_SEED));
+  if (arg_parser.get<int>(LBANN_OPTION_RANDOM_SEED) == -1) {
+    trainer->set_random_seed(arg_parser.get<int>(LBANN_OPTION_RANDOM_SEED));
   }
-  if (arg_parser.get<bool>(SERIALIZE_IO)) {
-    trainer->set_serialize_io(arg_parser.get<bool>(SERIALIZE_IO));
+  if (arg_parser.get<bool>(LBANN_OPTION_SERIALIZE_IO)) {
+    trainer->set_serialize_io(arg_parser.get<bool>(LBANN_OPTION_SERIALIZE_IO));
   }
 }
 
@@ -938,7 +938,7 @@ void print_parameters(const lbann_comm& comm,
   for(size_t i = 0; i < random_seeds.size(); i++) {
     int trainer_rank = comm.map_world_rank_to_trainer_rank(i);
     int rank_in_trainer = comm.map_world_rank_to_rank_in_trainer(i);
-    if(rank_in_trainer < arg_parser.get<int>(MAX_RNG_SEEDS_DISPLAY)) {
+    if(rank_in_trainer < arg_parser.get<int>(LBANN_OPTION_MAX_RNG_SEEDS_DISPLAY)) {
       std::stringstream id;
       id << "[" << trainer_rank << "][" << rank_in_trainer << "]";
       root_rng << id.str() << "=" << std::setfill('0') << std::setw(10) << static_cast<unsigned int>(root_random_seeds[i]) << " " ;
@@ -982,7 +982,7 @@ void save_session(const lbann_comm& comm, const int argc, char * const* argv, lb
 
   //do not write output file for a repeated experiment;
   //may want to revisit this decision later ...
-  if (arg_parser.get<std::string>(PROTOTEXT) != "") {
+  if (arg_parser.get<std::string>(LBANN_OPTION_PROTOTEXT) != "") {
     return;
   }
 
diff --git a/src/utils/lbann_library.cpp b/src/utils/lbann_library.cpp
index 3854e29cf28..429397aae85 100644
--- a/src/utils/lbann_library.cpp
+++ b/src/utils/lbann_library.cpp
@@ -84,10 +84,10 @@ load_inference_model(lbann_comm* lc,
 /// Return the
 int allocate_trainer_resources(lbann_comm *comm) {
   auto& arg_parser = global_argument_parser();
-  int procs_per_trainer = arg_parser.get<int>(PROCS_PER_TRAINER);
-  int trainer_grid_height = arg_parser.get<int>(TRAINER_GRID_HEIGHT);
-  int trainer_primary_grid_size = arg_parser.get<int>(TRAINER_PRIMARY_GRID_SIZE);
-  bool trainer_create_two_models = arg_parser.get<bool>(TRAINER_CREATE_TWO_MODELS);
+  int procs_per_trainer = arg_parser.get<int>(LBANN_OPTION_PROCS_PER_TRAINER);
+  int trainer_grid_height = arg_parser.get<int>(LBANN_OPTION_TRAINER_GRID_HEIGHT);
+  int trainer_primary_grid_size = arg_parser.get<int>(LBANN_OPTION_TRAINER_PRIMARY_GRID_SIZE);
+  bool trainer_create_two_models = arg_parser.get<bool>(LBANN_OPTION_TRAINER_CREATE_TWO_MODELS);
 
   if (procs_per_trainer == 0) {
     procs_per_trainer = comm->get_procs_in_world();
@@ -183,12 +183,12 @@ trainer& construct_trainer(lbann_comm* comm,
 
   // If the checkpoint directory has been overridden reset it before
   // setting up the trainer
-  if (arg_parser.get<std::string>(CKPT_DIR) != "") {
+  if (arg_parser.get<std::string>(LBANN_OPTION_CKPT_DIR) != "") {
     for (auto&& c : global_trainer_->get_callbacks()) {
       {
         auto* cb = dynamic_cast<callback::checkpoint*>(c);
         if(cb != nullptr) {
-          cb->set_checkpoint_dir(arg_parser.get<std::string>(CKPT_DIR));
+          cb->set_checkpoint_dir(arg_parser.get<std::string>(LBANN_OPTION_CKPT_DIR));
           if(comm->am_trainer_master()) {
             std::cout << "Setting the checkpoint directory to " << cb->get_checkpoint_dir() << std::endl;
           }
@@ -196,12 +196,12 @@ trainer& construct_trainer(lbann_comm* comm,
       }
     }
   }
-  if (arg_parser.get<std::string>(RESTART_DIR) != "") {
+  if (arg_parser.get<std::string>(LBANN_OPTION_RESTART_DIR) != "") {
     for (auto&& c : global_trainer_->get_callbacks()) {
       {
         auto* cb = dynamic_cast<callback::checkpoint*>(c);
         if(cb != nullptr) {
-          cb->set_restart_dir(arg_parser.get<std::string>(RESTART_DIR));
+          cb->set_restart_dir(arg_parser.get<std::string>(LBANN_OPTION_RESTART_DIR));
           if(comm->am_trainer_master()) {
             std::cout << "Setting the restart directory to " << cb->get_restart_dir() << std::endl;
           }
@@ -268,7 +268,7 @@ trainer& construct_trainer(lbann_comm* comm,
 
   global_trainer_->setup(std::move(io_thread_pool), data_readers);
 
-  if (arg_parser.get<bool>(DISABLE_BACKGROUND_IO_ACTIVITY)) {
+  if (arg_parser.get<bool>(LBANN_OPTION_DISABLE_BACKGROUND_IO_ACTIVITY)) {
     global_trainer_->allow_background_io_activity(false);
   }
 
@@ -299,7 +299,7 @@ std::unique_ptr<thread_pool> construct_io_thread_pool(lbann_comm* comm,
   }
 
   auto& arg_parser = global_argument_parser();
-  int req_io_threads = arg_parser.get<int>(NUM_IO_THREADS);
+  int req_io_threads = arg_parser.get<int>(LBANN_OPTION_NUM_IO_THREADS);
   int num_io_threads = std::max(std::min(max_io_threads, req_io_threads), 1);
 
   auto io_threads_offset = free_core_offset(comm);
@@ -338,7 +338,7 @@ std::unique_ptr<model> build_model_from_prototext(
 
   // Display how the OpenMP threads are provisioned
   auto& arg_parser = global_argument_parser();
-  if (arg_parser.get<bool>(PRINT_AFFINITY)) {
+  if (arg_parser.get<bool>(LBANN_OPTION_PRINT_AFFINITY)) {
     display_omp_setup();
   }
 
@@ -356,12 +356,12 @@ std::unique_ptr<model> build_model_from_prototext(
 
   // If the checkpoint directory has been overridden reset it before
   // setting up the model
-  if (arg_parser.get<std::string>(CKPT_DIR) != "") {
+  if (arg_parser.get<std::string>(LBANN_OPTION_CKPT_DIR) != "") {
     for (auto&& c : ret_model->get_callbacks()) {
       {
         auto* cb = dynamic_cast<callback::dump_weights*>(c);
         if(cb != nullptr) {
-          cb->set_target_dir(arg_parser.get<std::string>(CKPT_DIR));
+          cb->set_target_dir(arg_parser.get<std::string>(LBANN_OPTION_CKPT_DIR));
           if(comm->am_trainer_master()) {
             std::cout << "Setting the dump weights directory to " << cb->get_target_dir() << std::endl;
           }
@@ -370,7 +370,7 @@ std::unique_ptr<model> build_model_from_prototext(
       {
         auto* cb = dynamic_cast<callback::save_model*>(c);
         if(cb != nullptr) {
-          cb->set_target_dir(arg_parser.get<std::string>(CKPT_DIR));
+          cb->set_target_dir(arg_parser.get<std::string>(LBANN_OPTION_CKPT_DIR));
           if(comm->am_trainer_master()) {
             std::cout << "Setting the dump weights directory to " << cb->get_target_dir() << std::endl;
           }
@@ -379,7 +379,7 @@ std::unique_ptr<model> build_model_from_prototext(
     }
   }
 
-  if (arg_parser.get<std::string>(LOAD_MODEL_WEIGHTS_DIR) != "") {
+  if (arg_parser.get<std::string>(LBANN_OPTION_LOAD_MODEL_WEIGHTS_DIR) != "") {
     callback::load_model* cb = nullptr;
     for (auto&& c : ret_model->get_callbacks()) {
       cb = dynamic_cast<callback::load_model*>(c);
@@ -390,8 +390,8 @@ std::unique_ptr<model> build_model_from_prototext(
 
     std::string active_load_model_dir;
     std::string load_model_dir =
-      arg_parser.get<std::string>(LOAD_MODEL_WEIGHTS_DIR);
-    if (arg_parser.get<bool>(LOAD_MODEL_WEIGHTS_DIR_IS_COMPLETE)) {
+      arg_parser.get<std::string>(LBANN_OPTION_LOAD_MODEL_WEIGHTS_DIR);
+    if (arg_parser.get<bool>(LBANN_OPTION_LOAD_MODEL_WEIGHTS_DIR_IS_COMPLETE)) {
       active_load_model_dir = load_model_dir;
     }
     else {
@@ -422,7 +422,7 @@ std::unique_ptr<model> build_model_from_prototext(
       }
 #endif
     }else {
-      cb->add_dir(arg_parser.get<std::string>(LOAD_MODEL_WEIGHTS_DIR));
+      cb->add_dir(arg_parser.get<std::string>(LBANN_OPTION_LOAD_MODEL_WEIGHTS_DIR));
     }
   }
 
diff --git a/src/utils/options.cpp b/src/utils/options.cpp
index 6e2c7e89d3f..eaa91dd9f42 100644
--- a/src/utils/options.cpp
+++ b/src/utils/options.cpp
@@ -35,68 +35,68 @@ void construct_std_options()
 
   // Bool flags
   arg_parser.add_flag(
-    DISABLE_BACKGROUND_IO_ACTIVITY,
+    LBANN_OPTION_DISABLE_BACKGROUND_IO_ACTIVITY,
     {"--disable_background_io_activity"},
     "[STD] prevent the input layers from fetching data in the background");
   arg_parser.add_flag(
-    DISABLE_CUDA,
+    LBANN_OPTION_DISABLE_CUDA,
     {"--disable_cuda"},
     "[STD] has no effect unless LBANN was compiled with LBANN_HAS_CUDNN");
   arg_parser.add_flag(
-    LOAD_MODEL_WEIGHTS_DIR_IS_COMPLETE,
+    LBANN_OPTION_LOAD_MODEL_WEIGHTS_DIR_IS_COMPLETE,
     {"--load_model_weights_dir_is_complete"},
     "[STD] Use load_model_weights_dir as given, ignoring checkpoint hierarchy");
-  arg_parser.add_flag(LTFB_ALLOW_GLOBAL_STATISTICS,
+  arg_parser.add_flag(LBANN_OPTION_LTFB_ALLOW_GLOBAL_STATISTICS,
                       {"--ltfb_allow_global_statistics"},
                       utils::ENV("LBANN_LTFB_ALLOW_GLOBAL_STATISTICS"),
                       "[STD] Allow the print_statistics callback to report "
                       "global (inter-trainer) summary statistics.");
   arg_parser.add_flag(
-    LTFB_VERBOSE,
+    LBANN_OPTION_LTFB_VERBOSE,
     {"--ltfb_verbose"},
     "[STD] Increases number of per-trainer messages that are reported");
   arg_parser.add_flag(
-    NO_IM_COMM,
+    LBANN_OPTION_NO_IM_COMM,
     {"--no_im_comm"},
     "[STD] removed ImComm callback, if present; this is intended for"
     "running alexnet with a single model, but may be useful elsewhere");
-  arg_parser.add_flag(PRELOAD_DATA_STORE,
+  arg_parser.add_flag(LBANN_OPTION_PRELOAD_DATA_STORE,
                       {"--preload_data_store"},
                       "[STD] Preloads the data store in-memory structure "
                       "druing data reader load time");
   arg_parser.add_flag(
-    PRINT_AFFINITY,
+    LBANN_OPTION_PRINT_AFFINITY,
     {"--print_affinity"},
     "[STD] display information on how OpenMP threads are provisioned");
   arg_parser.add_flag(
-    SERIALIZE_IO,
+    LBANN_OPTION_SERIALIZE_IO,
     {"--serialize_io"},
     "[STD] force data readers to use a single threaded for I/O");
-  arg_parser.add_flag(ST_FULL_TRACE, {"--st_full_trace"}, "[STD] TODO");
-  arg_parser.add_flag(ST_ON, {"--st_on"}, "[STD] TODO");
-  arg_parser.add_flag(USE_CUBLAS_TENSOR_OPS,
+  arg_parser.add_flag(LBANN_OPTION_ST_FULL_TRACE, {"--st_full_trace"}, "[STD] TODO");
+  arg_parser.add_flag(LBANN_OPTION_ST_ON, {"--st_on"}, "[STD] TODO");
+  arg_parser.add_flag(LBANN_OPTION_USE_CUBLAS_TENSOR_OPS,
                       {"--use-cublas-tensor-ops"},
                       utils::ENV("LBANN_USE_CUBLAS_TENSOR_OPS"),
                       "[STD] Set the default cuBLAS math mode to use "
                       "Tensor Core operations when available.");
-  arg_parser.add_flag(USE_CUDNN_TENSOR_OPS,
+  arg_parser.add_flag(LBANN_OPTION_USE_CUDNN_TENSOR_OPS,
                       {"--use-cudnn-tensor-ops"},
                       utils::ENV("LBANN_USE_CUDNN_TENSOR_OPS"),
                       "[STD] Set the default cuDNN math mode to use "
                       "Tensor Core operations when available.");
-  arg_parser.add_flag(USE_DATA_STORE,
+  arg_parser.add_flag(LBANN_OPTION_USE_DATA_STORE,
                       {"--use_data_store"},
                       "[STD] Enables the data store in-memory structure");
-  arg_parser.add_flag(USE_LTFB, {"--ltfb"}, "[STD] TODO");
-  arg_parser.add_flag(VERBOSE,
+  arg_parser.add_flag(LBANN_OPTION_USE_LTFB, {"--ltfb"}, "[STD] TODO");
+  arg_parser.add_flag(LBANN_OPTION_VERBOSE,
                       {"--verbose", "--verbose_print"},
                       "[STD] Turns on verbose mode");
-  arg_parser.add_flag(WRITE_SAMPLE_LIST,
+  arg_parser.add_flag(LBANN_OPTION_WRITE_SAMPLE_LIST,
                       {"--write_sample_list"},
                       "[STD] Writes out the sample list that was loaded into "
                       "the current directory");
   arg_parser.add_flag(
-    USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP,
+    LBANN_OPTION_USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP,
     {"--use_gpu_default_memory_in_forward_prop"},
     utils::ENV("LBANN_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP"),
     "[STD] Use Hydrogen's default memory mode for GPU buffers in "
@@ -116,18 +116,18 @@ void construct_std_options()
 
   // Input options
   arg_parser.add_option(
-    CKPT_DIR,
+    LBANN_OPTION_CKPT_DIR,
     {"--checkpoint_dir", "--ckpt_dir"},
     "[STD] Save to or restart from a specific checkpoint directory.\n"
     "Additionally, sets the output directory for dumping weights.\n"
     "Modifies callbacks: checkpoint, save_model, dump_weights\n",
     "");
-  arg_parser.add_option(HYDROGEN_BLOCK_SIZE,
+  arg_parser.add_option(LBANN_OPTION_HYDROGEN_BLOCK_SIZE,
                         {"--hydrogen_block_size"},
                         "[STD] Block size for Hydrogen",
                         0);
   arg_parser.add_option(
-    LOAD_MODEL_WEIGHTS_DIR,
+    LBANN_OPTION_LOAD_MODEL_WEIGHTS_DIR,
     {"--load_model_weights_dir"},
     "[STD] Load model wieghts found in the given directory.\n"
     "If the directory doesn't exist, doesn't contain valid weights,\n"
@@ -135,49 +135,49 @@ void construct_std_options()
     "an error will be thrown.\n",
     "");
   arg_parser.add_option(
-    MAX_RNG_SEEDS_DISPLAY,
+    LBANN_OPTION_MAX_RNG_SEEDS_DISPLAY,
     {"--rng_seeds_per_trainer_to_display"},
     utils::ENV("LBANN_RNG_SEEDS_PER_TRAINER_TO_DISPLAY"),
     "[STD] Limit how many random seeds LBANN should display "
     "from each trainer",
     2);
-  arg_parser.add_option(METADATA, {"--metadata"}, "[STD] TODO", "");
-  arg_parser.add_option(MINI_BATCH_SIZE,
+  arg_parser.add_option(LBANN_OPTION_METADATA, {"--metadata"}, "[STD] TODO", "");
+  arg_parser.add_option(LBANN_OPTION_MINI_BATCH_SIZE,
                         {"--mini_batch_size"},
                         "[STD] Size of mini batches",
                         -1);
-  arg_parser.add_option(MODEL, {"--model"}, "[STD] TODO", "");
-  arg_parser.add_option(NUM_EPOCHS,
+  arg_parser.add_option(LBANN_OPTION_MODEL, {"--model"}, "[STD] TODO", "");
+  arg_parser.add_option(LBANN_OPTION_NUM_EPOCHS,
                         {"--num_epochs"},
                         "[STD] Number of epochs to train model",
                         -1);
-  arg_parser.add_option(NUM_IO_THREADS,
+  arg_parser.add_option(LBANN_OPTION_NUM_IO_THREADS,
                         {"--num_io_threads"},
                         utils::ENV("LBANN_NUM_IO_THREADS"),
                         "[STD] Number of threads available to both I/O and "
                         "initial data transformations for each rank.",
                         64);
-  arg_parser.add_option(NUM_PARALLEL_READERS,
+  arg_parser.add_option(LBANN_OPTION_NUM_PARALLEL_READERS,
                         {"--num_parallel_readers"},
                         "[STD] The number of parallel data readers",
                         1);
-  arg_parser.add_option(NUM_TEST_SAMPLES,
+  arg_parser.add_option(LBANN_OPTION_NUM_TEST_SAMPLES,
                         {"--num_test_samples"},
                         utils::ENV("LBANN_NUM_TEST_SAMPLES"),
                         "[STD] Set the number of testing samples to ingest.",
                         -1);
-  arg_parser.add_option(NUM_TRAIN_SAMPLES,
+  arg_parser.add_option(LBANN_OPTION_NUM_TRAIN_SAMPLES,
                         {"--num_train_samples"},
                         utils::ENV("LBANN_NUM_TRAIN_SAMPLES"),
                         "[STD] Set the number of training samples to ingest.",
                         -1);
-  arg_parser.add_option(NUM_VALIDATE_SAMPLES,
+  arg_parser.add_option(LBANN_OPTION_NUM_VALIDATE_SAMPLES,
                         {"--num_validate_samples"},
                         utils::ENV("LBANN_NUM_VALIDATE_SAMPLES"),
                         "[STD] Set the number of validate samples to ingest.",
                         -1);
-  arg_parser.add_option(OPTIMIZER, {"--optimizer"}, "[STD] TODO", "");
-  arg_parser.add_option(PROCS_PER_TRAINER,
+  arg_parser.add_option(LBANN_OPTION_OPTIMIZER, {"--optimizer"}, "[STD] TODO", "");
+  arg_parser.add_option(LBANN_OPTION_PROCS_PER_TRAINER,
                         {"--procs_per_trainer"},
                         utils::ENV("LBANN_PROCS_PER_TRAINER"),
                         "[STD] Number of MPI ranks per LBANN trainer, "
@@ -188,36 +188,36 @@ void construct_std_options()
                         " The number of resulting trainers is "
                         " num_procs / procs_per_trainer.",
                         -1);
-  arg_parser.add_option(PROTOTEXT,
+  arg_parser.add_option(LBANN_OPTION_PROTOTEXT,
                         {"--prototext"},
                         "[STD] Prototext file containing experiment",
                         "");
-  arg_parser.add_option(RANDOM_SEED,
+  arg_parser.add_option(LBANN_OPTION_RANDOM_SEED,
                         {"--random_seed", "--rand_seed"},
                         "[STD] Value to seed RNG",
                         -1);
-  arg_parser.add_option(READER, {"--reader"}, "[STD] TODO", "");
+  arg_parser.add_option(LBANN_OPTION_READER, {"--reader"}, "[STD] TODO", "");
   arg_parser.add_option(
-    RESTART_DIR,
+    LBANN_OPTION_RESTART_DIR,
     {"--restart_dir"},
     "[STD] Restart from a checkpoint found in the given directory.\n"
     "If the directory doesn't exist or doesn't contain a checkpoint,\n"
     "an error will be thrown.\n",
     "");
   arg_parser.add_option(
-    TRAINER_CREATE_TWO_MODELS,
+    LBANN_OPTION_TRAINER_CREATE_TWO_MODELS,
     {"--trainer_create_two_models"},
     utils::ENV("LBANN_TRAINER_CREATE_TWO_MODELS"),
     "[STD] Create two models (one each for primary and secondary grid). "
     "Default is False.",
     false);
-  arg_parser.add_option(TRAINER_GRID_HEIGHT,
+  arg_parser.add_option(LBANN_OPTION_TRAINER_GRID_HEIGHT,
                         {"--trainer_grid_height"},
                         utils::ENV("LBANN_TRAINER_GRID_HEIGHT"),
                         "[STD] Height of 2D process grid for each trainer. "
                         "Default grid is approximately square.",
                         -1);
-  arg_parser.add_option(TRAINER_PRIMARY_GRID_SIZE,
+  arg_parser.add_option(LBANN_OPTION_TRAINER_PRIMARY_GRID_SIZE,
                         {"--trainer_primary_grid_size"},
                         utils::ENV("LBANN_TRAINER_PRIMARY_GRID_SIZE"),
                         "[STD] Primary grid size per trainer. "
@@ -230,34 +230,34 @@ void construct_datastore_options()
   auto& arg_parser = global_argument_parser();
 
   // Bool flags
-  arg_parser.add_flag(DATA_STORE_CACHE,
+  arg_parser.add_flag(LBANN_OPTION_DATA_STORE_CACHE,
                       {"--data_store_cache"},
                       "[DATASTORE] TODO");
-  arg_parser.add_flag(DATA_STORE_DEBUG,
+  arg_parser.add_flag(LBANN_OPTION_DATA_STORE_DEBUG,
                       {"--data_store_debug"},
                       "[DATASTORE] TODO");
-  arg_parser.add_flag(DATA_STORE_FAIL,
+  arg_parser.add_flag(LBANN_OPTION_DATA_STORE_FAIL,
                       {"--data_store_fail"},
                       "[DATASTORE] TODO");
-  arg_parser.add_flag(DATA_STORE_MIN_MAX_TIMING,
+  arg_parser.add_flag(LBANN_OPTION_DATA_STORE_MIN_MAX_TIMING,
                       {"--data_store_min_max_timing"},
                       "[DATASTORE] TODO");
-  arg_parser.add_flag(DATA_STORE_NO_THREAD,
+  arg_parser.add_flag(LBANN_OPTION_DATA_STORE_NO_THREAD,
                       {"--data_store_no_thread"},
                       "[DATASTORE] TODO");
-  arg_parser.add_flag(DATA_STORE_PROFILE,
+  arg_parser.add_flag(LBANN_OPTION_DATA_STORE_PROFILE,
                       {"--data_store_profile"},
                       "[DATASTORE] TODO");
-  arg_parser.add_flag(DATA_STORE_TEST_CACHE,
+  arg_parser.add_flag(LBANN_OPTION_DATA_STORE_TEST_CACHE,
                       {"--data_store_test_cache"},
                       "[DATASTORE] TODO");
 
   // Input options
-  arg_parser.add_option(DATA_STORE_SPILL,
+  arg_parser.add_option(LBANN_OPTION_DATA_STORE_SPILL,
                         {"--data_store_spill"},
                         "[DATASTORE] TODO",
                         "");
-  arg_parser.add_option(DATA_STORE_TEST_CHECKPOINT,
+  arg_parser.add_option(LBANN_OPTION_DATA_STORE_TEST_CHECKPOINT,
                         {"--data_store_test_checkpoint"},
                         "[DATASTORE] TODO",
                         "");
@@ -268,146 +268,146 @@ void construct_datareader_options()
   auto& arg_parser = global_argument_parser();
 
   // Bool flags
-  arg_parser.add_flag(ALL_GATHER_OLD,
+  arg_parser.add_flag(LBANN_OPTION_ALL_GATHER_OLD,
                       {"--all_gather_old"},
                       "[DATAREADER] TODO");
-  arg_parser.add_flag(CHECK_DATA, {"--check_data"}, "[DATAREADER] TODO");
-  arg_parser.add_flag(CREATE_TARBALL,
+  arg_parser.add_flag(LBANN_OPTION_CHECK_DATA, {"--check_data"}, "[DATAREADER] TODO");
+  arg_parser.add_flag(LBANN_OPTION_CREATE_TARBALL,
                       {"--create_tarball"},
                       "[DATAREADER] TODO");
-  arg_parser.add_flag(DEBUG_CONCATENATE,
+  arg_parser.add_flag(LBANN_OPTION_DEBUG_CONCATENATE,
                       {"--debug_concatenate"},
                       "[DATAREADER] TODO");
-  arg_parser.add_flag(DISABLE_SIGNAL_HANDLER,
+  arg_parser.add_flag(LBANN_OPTION_DISABLE_SIGNAL_HANDLER,
                       {"--disable_signal_handler"},
                       "[DATAREADER] TODO");
-  arg_parser.add_flag(EXIT_AFTER_SETUP,
+  arg_parser.add_flag(LBANN_OPTION_EXIT_AFTER_SETUP,
                       {"--exit_after_setup"},
                       "[DATAREADER] TODO");
-  arg_parser.add_flag(GENERATE_MULTI_PROTO,
+  arg_parser.add_flag(LBANN_OPTION_GENERATE_MULTI_PROTO,
                       {"--generate_multi_proto"},
                       "[DATAREADER] TODO");
-  arg_parser.add_flag(KEEP_SAMPLE_ORDER,
+  arg_parser.add_flag(LBANN_OPTION_KEEP_SAMPLE_ORDER,
                       {"--keep_sample_order"},
                       "[DATAREADER] TODO");
-  arg_parser.add_flag(KEEP_PACKED_FIELDS,
+  arg_parser.add_flag(LBANN_OPTION_KEEP_PACKED_FIELDS,
                       {"--keep_packed_fields"},
                       "[DATAREADER] TODO");
-  arg_parser.add_flag(LOAD_FULL_SAMPLE_LIST_ONCE,
+  arg_parser.add_flag(LBANN_OPTION_LOAD_FULL_SAMPLE_LIST_ONCE,
                       {"--load_full_sample_list_once"},
                       "[DATAREADER] TODO");
-  arg_parser.add_flag(MAKE_TEST_FAIL,
+  arg_parser.add_flag(LBANN_OPTION_MAKE_TEST_FAIL,
                       {"--make_test_fail"},
                       "[DATAREADER] TODO");
-  arg_parser.add_flag(NODE_SIZES_VARY,
+  arg_parser.add_flag(LBANN_OPTION_NODE_SIZES_VARY,
                       {"--node_sizes_vary"},
                       "[DATAREADER] TODO");
   arg_parser.add_flag(LBANN_OPTION_QUIET, {"--quiet"}, "[DATAREADER] TODO");
-  arg_parser.add_flag(STACK_TRACE_TO_FILE,
+  arg_parser.add_flag(LBANN_OPTION_STACK_TRACE_TO_FILE,
                       {"--stack_trace_to_file"},
                       "[DATAREADER] TODO");
-  arg_parser.add_flag(TEST_ENCODE, {"--test_encode"}, "[DATAREADER] TODO");
-  arg_parser.add_flag(WRITE_SAMPLE_LABEL_LIST,
+  arg_parser.add_flag(LBANN_OPTION_TEST_ENCODE, {"--test_encode"}, "[DATAREADER] TODO");
+  arg_parser.add_flag(LBANN_OPTION_WRITE_SAMPLE_LABEL_LIST,
                       {"--write_sample_label_list"},
                       "[DATAREADER] TODO");
-  arg_parser.add_flag(Z_SCORE, {"--z_score"}, "[DATAREADER] TODO");
+  arg_parser.add_flag(LBANN_OPTION_Z_SCORE, {"--z_score"}, "[DATAREADER] TODO");
 
   // Input options
-  arg_parser.add_option(ABSOLUTE_SAMPLE_COUNT,
+  arg_parser.add_option(LBANN_OPTION_ABSOLUTE_SAMPLE_COUNT,
                         {"--absolute_sample_count"},
                         "[DATAREADER] TODO",
                         -1);
   arg_parser.add_option(
-    DATA_FILEDIR,
+    LBANN_OPTION_DATA_FILEDIR,
     {"--data_filedir"},
     "[DATAREADER] Sets the file direcotry for train and test data",
     "");
-  arg_parser.add_option(DATA_FILEDIR_TEST,
+  arg_parser.add_option(LBANN_OPTION_DATA_FILEDIR_TEST,
                         {"--data_filedir_test"},
                         "[DATAREADER] TODO",
                         "");
-  arg_parser.add_option(DATA_FILEDIR_TRAIN,
+  arg_parser.add_option(LBANN_OPTION_DATA_FILEDIR_TRAIN,
                         {"--data_filedir_train"},
                         "[DATAREADER] TODO",
                         "");
-  arg_parser.add_option(DATA_FILEDIR_VALIDATE,
+  arg_parser.add_option(LBANN_OPTION_DATA_FILEDIR_VALIDATE,
                         {"--data_filedir_validate"},
                         "[DATAREADER] TODO",
                         "");
-  arg_parser.add_option(DATA_FILENAME_TEST,
+  arg_parser.add_option(LBANN_OPTION_DATA_FILENAME_TEST,
                         {"--data_filename_test"},
                         "[DATAREADER] TODO",
                         "");
-  arg_parser.add_option(DATA_FILENAME_TRAIN,
+  arg_parser.add_option(LBANN_OPTION_DATA_FILENAME_TRAIN,
                         {"--data_filename_train"},
                         "[DATAREADER] TODO",
                         "");
-  arg_parser.add_option(DATA_FILENAME_VALIDATE,
+  arg_parser.add_option(LBANN_OPTION_DATA_FILENAME_VALIDATE,
                         {"--data_filename_validate"},
                         "[DATAREADER] TODO",
                         "");
-  arg_parser.add_option(DATA_READER_PERCENT,
+  arg_parser.add_option(LBANN_OPTION_DATA_READER_PERCENT,
                         {"--data_reader_percent"},
                         "[DATAREADER] TODO",
                         (float)-1);
-  arg_parser.add_option(DELIMITER, {"--delimiter"}, "[DATAREADER] TODO", "");
-  arg_parser.add_option(IMAGE_SIZES_FILENAME,
+  arg_parser.add_option(LBANN_OPTION_DELIMITER, {"--delimiter"}, "[DATAREADER] TODO", "");
+  arg_parser.add_option(LBANN_OPTION_IMAGE_SIZES_FILENAME,
                         {"--image_sizes_filename"},
                         "[DATAREADER] TODO",
                         "");
-  arg_parser.add_option(LABEL_FILENAME_TEST,
+  arg_parser.add_option(LBANN_OPTION_LABEL_FILENAME_TEST,
                         {"--label_filename_test"},
                         "[DATAREADER] TODO",
                         "");
-  arg_parser.add_option(LABEL_FILENAME_TRAIN,
+  arg_parser.add_option(LBANN_OPTION_LABEL_FILENAME_TRAIN,
                         {"--label_filename_train"},
                         "[DATAREADER] TODO",
                         "");
-  arg_parser.add_option(LABEL_FILENAME_VALIDATE,
+  arg_parser.add_option(LBANN_OPTION_LABEL_FILENAME_VALIDATE,
                         {"--label_filename_validate"},
                         "[DATAREADER] TODO",
                         "");
-  arg_parser.add_option(NORMALIZATION,
+  arg_parser.add_option(LBANN_OPTION_NORMALIZATION,
                         {"--normalization"},
                         "[DATAREADER] TODO",
                         "");
-  arg_parser.add_option(N_LINES, {"--n_lines"}, "[DATAREADER] TODO", -1);
-  arg_parser.add_option(PAD_INDEX, {"--pad_index"}, "[DATAREADER] TODO", -1);
-  arg_parser.add_option(PILOT2_READ_FILE_SIZES,
+  arg_parser.add_option(LBANN_OPTION_N_LINES, {"--n_lines"}, "[DATAREADER] TODO", -1);
+  arg_parser.add_option(LBANN_OPTION_PAD_INDEX, {"--pad_index"}, "[DATAREADER] TODO", -1);
+  arg_parser.add_option(LBANN_OPTION_PILOT2_READ_FILE_SIZES,
                         {"--pilot2_read_file_sizes"},
                         "[DATAREADER] TODO",
                         "");
-  arg_parser.add_option(PILOT2_SAVE_FILE_SIZES,
+  arg_parser.add_option(LBANN_OPTION_PILOT2_SAVE_FILE_SIZES,
                         {"--pilot2_save_file_sizes"},
                         "[DATAREADER] TODO",
                         "");
-  arg_parser.add_option(SAMPLE_LIST_TEST,
+  arg_parser.add_option(LBANN_OPTION_SAMPLE_LIST_TEST,
                         {"--sample_list_test"},
                         "[DATAREADER] TODO",
                         "");
-  arg_parser.add_option(SAMPLE_LIST_TRAIN,
+  arg_parser.add_option(LBANN_OPTION_SAMPLE_LIST_TRAIN,
                         {"--sample_list_train"},
                         "[DATAREADER] TODO",
                         "");
-  arg_parser.add_option(SAMPLE_LIST_VALIDATE,
+  arg_parser.add_option(LBANN_OPTION_SAMPLE_LIST_VALIDATE,
                         {"--sample_list_validate"},
                         "[DATAREADER] TODO",
                         "");
-  arg_parser.add_option(SEQUENCE_LENGTH,
+  arg_parser.add_option(LBANN_OPTION_SEQUENCE_LENGTH,
                         {"--sequence_length", "--seq_len"},
                         "[DATAREADER] TODO",
                         -1);
-  arg_parser.add_option(SMILES_BUFFER_SIZE,
+  arg_parser.add_option(LBANN_OPTION_SMILES_BUFFER_SIZE,
                         {"--smiles_buffer_size"},
                         utils::ENV("LBANN_SMILES_BUFFER_SIZE"),
                         "[DATAREADER] Size of the read buffer for the SMILES "
                         "data reader.",
                         16 * 1024 * 1024UL);
-  arg_parser.add_option(TEST_TARBALL,
+  arg_parser.add_option(LBANN_OPTION_TEST_TARBALL,
                         {"--test_tarball"},
                         "[DATAREADER] TODO",
                         -1);
-  arg_parser.add_option(VOCAB, {"--vocab"}, "[DATAREADER] TODO", "");
+  arg_parser.add_option(LBANN_OPTION_VOCAB, {"--vocab"}, "[DATAREADER] TODO", "");
 }
 
 void construct_jag_options()
@@ -415,35 +415,35 @@ void construct_jag_options()
   auto& arg_parser = global_argument_parser();
 
   // Bool flags
-  arg_parser.add_flag(JAG, {"--jag"}, "[JAG] TODO");
-  arg_parser.add_flag(JAG_PARTITIONED, {"--jag_partitioned"}, "[JAG] TODO");
+  arg_parser.add_flag(LBANN_OPTION_JAG, {"--jag"}, "[JAG] TODO");
+  arg_parser.add_flag(LBANN_OPTION_JAG_PARTITIONED, {"--jag_partitioned"}, "[JAG] TODO");
 
   // Input options
-  arg_parser.add_option(BASE_DIR, {"--base_dir"}, "[JAG] TODO", "");
-  arg_parser.add_option(FILELIST, {"--filelist"}, "[JAG] TODO", "");
-  arg_parser.add_option(FILENAME, {"--filename"}, "[JAG] TODO", "");
-  arg_parser.add_option(FORMAT, {"--format"}, "[JAG] TODO", "");
-  arg_parser.add_option(INDEX_FN, {"--index_fn"}, "[JAG] TODO", "");
-  arg_parser.add_option(MAPPING_FN, {"--mapping_fn"}, "[JAG] TODO", "");
-  arg_parser.add_option(NUM_LISTS, {"--num_lists"}, "[JAG] TODO", -1);
-  arg_parser.add_option(NUM_SAMPLES, {"--num_samples"}, "[JAG] TODO", -1);
-  arg_parser.add_option(NUM_SAMPLES_PER_FILE,
+  arg_parser.add_option(LBANN_OPTION_BASE_DIR, {"--base_dir"}, "[JAG] TODO", "");
+  arg_parser.add_option(LBANN_OPTION_FILELIST, {"--filelist"}, "[JAG] TODO", "");
+  arg_parser.add_option(LBANN_OPTION_FILENAME, {"--filename"}, "[JAG] TODO", "");
+  arg_parser.add_option(LBANN_OPTION_FORMAT, {"--format"}, "[JAG] TODO", "");
+  arg_parser.add_option(LBANN_OPTION_INDEX_FN, {"--index_fn"}, "[JAG] TODO", "");
+  arg_parser.add_option(LBANN_OPTION_MAPPING_FN, {"--mapping_fn"}, "[JAG] TODO", "");
+  arg_parser.add_option(LBANN_OPTION_NUM_LISTS, {"--num_lists"}, "[JAG] TODO", -1);
+  arg_parser.add_option(LBANN_OPTION_NUM_SAMPLES, {"--num_samples"}, "[JAG] TODO", -1);
+  arg_parser.add_option(LBANN_OPTION_NUM_SAMPLES_PER_FILE,
                         {"--num_samples_per_file"},
                         "[JAG] TODO",
-                        -1);
-  arg_parser.add_option(NUM_SAMPLES_PER_LIST,
+                        1000);
+  arg_parser.add_option(LBANN_OPTION_NUM_SAMPLES_PER_LIST,
                         {"--num_samples_per_list"},
                         "[JAG] TODO",
                         -1);
-  arg_parser.add_option(NUM_SUBDIRS, {"--num_subdirs"}, "[JAG] TODO", -1);
-  arg_parser.add_option(OUTPUT_BASE_DIR,
+  arg_parser.add_option(LBANN_OPTION_NUM_SUBDIRS, {"--num_subdirs"}, "[JAG] TODO", -1);
+  arg_parser.add_option(LBANN_OPTION_OUTPUT_BASE_DIR,
                         {"--output_base_dir"},
                         "[JAG] TODO",
                         "");
-  arg_parser.add_option(OUTPUT_BASE_FN, {"--output_base_fn"}, "[JAG] TODO", "");
-  arg_parser.add_option(OUTPUT_DIR, {"--output_dir"}, "[JAG] TODO", "");
-  arg_parser.add_option(OUTPUT_FN, {"--output_fn"}, "[JAG] TODO", "");
-  arg_parser.add_option(SAMPLES_PER_FILE,
+  arg_parser.add_option(LBANN_OPTION_OUTPUT_BASE_FN, {"--output_base_fn"}, "[JAG] TODO", "");
+  arg_parser.add_option(LBANN_OPTION_OUTPUT_DIR, {"--output_dir"}, "[JAG] TODO", "");
+  arg_parser.add_option(LBANN_OPTION_OUTPUT_FN, {"--output_fn"}, "[JAG] TODO", "");
+  arg_parser.add_option(LBANN_OPTION_SAMPLES_PER_FILE,
                         {"--samples_per_file"},
                         "[JAG] TODO",
                         -1);
diff --git a/src/utils/protobuf_utils.cpp b/src/utils/protobuf_utils.cpp
index fe45761562a..66a79f2c841 100644
--- a/src/utils/protobuf_utils.cpp
+++ b/src/utils/protobuf_utils.cpp
@@ -49,7 +49,7 @@ parse_prototext_filenames_from_command_line(
   std::vector<std::string> data_set_metadata;
   bool single_file_load = false;
 
-  std::string params[] = {PROTOTEXT, MODEL, READER, METADATA, OPTIMIZER};
+  std::string params[] = {LBANN_OPTION_PROTOTEXT, LBANN_OPTION_MODEL, LBANN_OPTION_READER, LBANN_OPTION_METADATA, LBANN_OPTION_OPTIMIZER};
   for (auto& which : params) {
     std::string fn = arg_parser.get<std::string>(which);
     if (fn != "") {
@@ -60,20 +60,20 @@ parse_prototext_filenames_from_command_line(
           fn.substr(0, t_pos + 7) + std::to_string(trainer_rank);
         fn = fname;
       }
-      if (which == PROTOTEXT) {
+      if (which == LBANN_OPTION_PROTOTEXT) {
         models.push_back(fn);
         single_file_load = true;
       }
-      if (which == MODEL) {
+      if (which == LBANN_OPTION_MODEL) {
         models.push_back(fn);
       }
-      if (which == READER) {
+      if (which == LBANN_OPTION_READER) {
         readers.push_back(fn);
       }
-      if (which == METADATA) {
+      if (which == LBANN_OPTION_METADATA) {
         data_set_metadata.push_back(fn);
       }
-      if (which == OPTIMIZER) {
+      if (which == LBANN_OPTION_OPTIMIZER) {
         optimizers.push_back(fn);
       }
     }
diff --git a/src/utils/stack_profiler.cpp b/src/utils/stack_profiler.cpp
index fe1ca19e859..65d01cb5415 100644
--- a/src/utils/stack_profiler.cpp
+++ b/src/utils/stack_profiler.cpp
@@ -84,11 +84,11 @@ void stack_profiler::activate(int thread) {
   c_hash_thread_id = thread;
   auto& arg_parser = global_argument_parser();
 
-  if (arg_parser.get<bool>(ST_ON)) {
+  if (arg_parser.get<bool>(LBANN_OPTION_ST_ON)) {
     std::cerr << "creating hash table!\n";
     c_hash_create(10000);
     c_hash_profiling_is_turned_on = 1;
-    if (arg_parser.get<bool>(ST_FULL_TRACE)) {
+    if (arg_parser.get<bool>(LBANN_OPTION_ST_FULL_TRACE)) {
       m_full_stack_trace = true;
       if (m_thread_id == 0) {
         c_hash_fp_full_stack_trace = fopen("full_stack_trace.bin", "wb");
diff --git a/src/weights/data_type_weights.cpp b/src/weights/data_type_weights.cpp
index 50b3cfc4c45..d82ee6ca074 100644
--- a/src/weights/data_type_weights.cpp
+++ b/src/weights/data_type_weights.cpp
@@ -236,7 +236,7 @@ void data_type_weights<TensorDataType>::do_setup_() {
 #ifdef LBANN_HAS_GPU
   if (matrix_dist.device == El::Device::GPU) {
     const auto& arg_parser = global_argument_parser();
-    if (!arg_parser.get<bool>(USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP)) {
+    if (!arg_parser.get<bool>(LBANN_OPTION_USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP)) {
       m_values->Matrix().SetMemoryMode(0); // Directly-allocated memory
     }
   }

From 5166549fff18e720cd501c7cd266b829bec8636b Mon Sep 17 00:00:00 2001
From: Tim Moon <moon13@llnl.gov>
Date: Mon, 1 Nov 2021 16:27:57 -0700
Subject: [PATCH 26/37] Restore support for labels and responses in synthetic
 data reader (#1986)

---
 src/data_readers/data_reader_synthetic.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/data_readers/data_reader_synthetic.cpp b/src/data_readers/data_reader_synthetic.cpp
index 38a9671f013..74d69f3b212 100644
--- a/src/data_readers/data_reader_synthetic.cpp
+++ b/src/data_readers/data_reader_synthetic.cpp
@@ -62,6 +62,12 @@ data_reader_synthetic::data_reader_synthetic(int num_samples,
     m_dimensions(dims)
 {
   set_has_labels(true);
+  set_has_data_field(INPUT_DATA_TYPE_SAMPLES, true);
+  set_has_data_field(INPUT_DATA_TYPE_LABELS, true);
+  m_synthetic_data_fields[INPUT_DATA_TYPE_SAMPLES] = dims;
+  m_synthetic_data_fields[INPUT_DATA_TYPE_LABELS] = {num_labels};
+  m_supported_input_types[INPUT_DATA_TYPE_SAMPLES] = true;
+  m_supported_input_types[INPUT_DATA_TYPE_LABELS] = true;
 }
 
 data_reader_synthetic::data_reader_synthetic(int num_samples,
@@ -75,6 +81,12 @@ data_reader_synthetic::data_reader_synthetic(int num_samples,
     m_response_dimensions(response_dims)
 {
   set_has_responses(true);
+  set_has_data_field(INPUT_DATA_TYPE_SAMPLES, true);
+  set_has_data_field(INPUT_DATA_TYPE_RESPONSES, true);
+  m_synthetic_data_fields[INPUT_DATA_TYPE_SAMPLES] = dims;
+  m_synthetic_data_fields[INPUT_DATA_TYPE_RESPONSES] = response_dims;
+  m_supported_input_types[INPUT_DATA_TYPE_SAMPLES] = true;
+  m_supported_input_types[INPUT_DATA_TYPE_RESPONSES] = true;
 }
 
 data_reader_synthetic::data_reader_synthetic(int num_samples,

From fda4fd00ef2421250cb96d858d3d2c9023e81157 Mon Sep 17 00:00:00 2001
From: Tom Benson <30674819+benson31@users.noreply.github.com>
Date: Mon, 1 Nov 2021 19:43:24 -0400
Subject: [PATCH 27/37] Rename some training algorithm symbols (#1983)

* rename training_algorithm -> TrainingAlgorithm

* rename sgd_training_algorithm -> SGDTrainingAlgorithm

* rename sgd_termination_criteria -> SGDTerminationCriteria; sgd_execution_context -> SGDExecutionContext.

Also, {batch,epoch,seconds}_termination_criteria -> {Batch,Epoch,Seconds}TerminationCriteria, resp.

* rename ltfb::TerminationCriteria -> ltfb::LTFBTerminationCriteria so the basename matches the SGD counterparts

* rename kfac::ExecutionContext -> kfac::KFACExecutionContext

* consolidate execution context files in the execution_algorithms directories
---
 include/lbann/CMakeLists.txt                  |  1 -
 include/lbann/callbacks/callback.hpp          |  4 +-
 include/lbann/callbacks/checkpoint.hpp        |  6 +-
 include/lbann/callbacks/dump_outputs.hpp      |  2 +-
 .../data_coordinator/data_coordinator.hpp     | 16 ++--
 .../batch_functional_inference_algorithm.hpp  |  4 +-
 .../execution_context.hpp                     | 42 +++++-----
 .../lbann/execution_algorithms/factory.hpp    | 10 +--
 include/lbann/execution_algorithms/kfac.hpp   | 18 ++---
 .../kfac/execution_context.hpp                | 20 ++---
 .../execution_algorithms/kfac/kfac_block.hpp  | 14 ++--
 .../kfac/kfac_block_bn.hpp                    |  4 +-
 .../kfac/kfac_block_fc_conv.hpp               |  4 +-
 .../kfac/kfac_block_gru.hpp                   | 10 +--
 include/lbann/execution_algorithms/ltfb.hpp   | 22 ++---
 .../ltfb/execution_context.hpp                | 14 ++--
 .../ltfb/meta_learning_strategy.hpp           |  2 +-
 .../ltfb/random_pairwise_exchange.hpp         | 10 +--
 .../ltfb/regularized_evolution.hpp            |  4 +-
 .../ltfb/termination_criteria.hpp             | 14 ++--
 .../ltfb/truncation_selection_exchange.hpp    |  4 +-
 .../sgd_execution_context.hpp                 | 71 ++++++++--------
 .../sgd_training_algorithm.hpp                | 52 ++++++------
 .../training_algorithm.hpp                    | 32 ++++----
 .../lbann/execution_contexts/CMakeLists.txt   |  8 --
 include/lbann/models/model.hpp                | 48 +++++------
 include/lbann/trainers/trainer.hpp            | 26 +++---
 src/CMakeLists.txt                            |  1 -
 src/callbacks/check_dataset.cpp               |  2 +-
 src/callbacks/check_gradients.cpp             | 24 +++---
 src/callbacks/check_init.cpp                  |  2 +-
 src/callbacks/check_nan.cpp                   | 10 +--
 src/callbacks/checkpoint.cpp                  |  6 +-
 src/callbacks/confusion_matrix.cpp            |  2 +-
 src/callbacks/debug.cpp                       |  2 +-
 src/callbacks/debug_io.cpp                    |  2 +-
 src/callbacks/dump_error_signals.cpp          |  2 +-
 src/callbacks/dump_gradients.cpp              |  2 +-
 .../dump_minibatch_sample_indices.cpp         |  2 +-
 src/callbacks/dump_outputs.cpp                |  2 +-
 src/callbacks/dump_weights.cpp                |  4 +-
 src/callbacks/early_stopping.cpp              |  2 +-
 src/callbacks/learning_rate.cpp               | 12 +--
 src/callbacks/mixup.cpp                       |  2 +-
 src/callbacks/monitor_io.cpp                  |  4 +-
 src/callbacks/print_statistics.cpp            |  4 +-
 src/callbacks/profiler.cpp                    | 12 +--
 src/callbacks/save_images.cpp                 |  2 +-
 src/callbacks/save_model.cpp                  |  2 +-
 src/callbacks/save_topk_models.cpp            |  2 +-
 src/callbacks/summarize_images.cpp            |  6 +-
 src/callbacks/timer.cpp                       |  2 +-
 src/callbacks/variable_minibatch.cpp          |  8 +-
 src/data_readers/data_reader.cpp              |  6 +-
 src/data_readers/data_reader_jag_conduit.cpp  |  4 +-
 .../data_reader_numpy_npz_conduit.cpp         |  2 +-
 src/execution_algorithms/CMakeLists.txt       |  2 +
 .../execution_context.cpp                     | 10 +--
 src/execution_algorithms/factory.cpp          |  6 +-
 src/execution_algorithms/kfac.cpp             | 30 +++----
 .../kfac/execution_context.cpp                | 20 ++---
 src/execution_algorithms/ltfb.cpp             |  6 +-
 .../ltfb/random_pairwise_exchange.cpp         |  4 +-
 .../ltfb/regularized_evolution.cpp            |  4 +-
 .../ltfb/truncation_selection_exchange.cpp    |  4 +-
 .../sgd_execution_context.cpp                 | 34 ++++----
 .../sgd_training_algorithm.cpp                | 80 +++++++++----------
 .../training_algorithm.cpp                    |  6 +-
 .../training_algorithm_factory_test.cpp       |  6 +-
 src/execution_contexts/CMakeLists.txt         |  8 --
 src/layers/data_type_distconv_adapter.cpp     |  2 +-
 src/layers/data_type_layer.cpp                |  6 +-
 src/layers/io/input_layer.cpp                 |  6 +-
 src/layers/layer.cpp                          |  2 +-
 src/layers/transform/evaluation.cpp           |  4 +-
 src/models/model.cpp                          |  4 +-
 src/proto/factories/trainer_factory.cpp       |  2 +-
 src/trainers/trainer.cpp                      | 54 ++++++-------
 78 files changed, 441 insertions(+), 454 deletions(-)
 rename include/lbann/{execution_contexts => execution_algorithms}/execution_context.hpp (79%)
 rename include/lbann/{execution_contexts => execution_algorithms}/sgd_execution_context.hpp (73%)
 delete mode 100644 include/lbann/execution_contexts/CMakeLists.txt
 rename src/{execution_contexts => execution_algorithms}/execution_context.cpp (83%)
 rename src/{execution_contexts => execution_algorithms}/sgd_execution_context.cpp (77%)
 delete mode 100644 src/execution_contexts/CMakeLists.txt

diff --git a/include/lbann/CMakeLists.txt b/include/lbann/CMakeLists.txt
index 06870d34b0f..18fd1d1fbd3 100644
--- a/include/lbann/CMakeLists.txt
+++ b/include/lbann/CMakeLists.txt
@@ -12,7 +12,6 @@ add_subdirectory(callbacks)
 add_subdirectory(data_coordinator)
 add_subdirectory(data_readers)
 add_subdirectory(data_store)
-add_subdirectory(execution_contexts)
 add_subdirectory(io)
 add_subdirectory(layers)
 add_subdirectory(macros)
diff --git a/include/lbann/callbacks/callback.hpp b/include/lbann/callbacks/callback.hpp
index 0f76e6d5f87..d254abee671 100644
--- a/include/lbann/callbacks/callback.hpp
+++ b/include/lbann/callbacks/callback.hpp
@@ -35,7 +35,7 @@
 #include "lbann/utils/description.hpp"
 #include "lbann/utils/memory.hpp"
 #include "lbann/utils/summary.hpp"
-#include "lbann/execution_contexts/sgd_execution_context.hpp"
+#include "lbann/execution_algorithms/sgd_execution_context.hpp"
 
 #include <google/protobuf/message.h>
 
@@ -217,7 +217,7 @@ class callback_base {
   inline std::string get_multi_trainer_ec_model_path(const model& m,
                                                      const std::string& root_dir) {
     std::string dir = get_multi_trainer_path(m, root_dir);
-    const auto& c = static_cast<const sgd_execution_context&>(m.get_execution_context());
+    const auto& c = static_cast<const SGDExecutionContext&>(m.get_execution_context());
     return build_string(dir,
                         c.get_state_string(), '/',
                         m.get_name(), '/');
diff --git a/include/lbann/callbacks/checkpoint.hpp b/include/lbann/callbacks/checkpoint.hpp
index 09c771cdadc..1fbad5cff60 100644
--- a/include/lbann/callbacks/checkpoint.hpp
+++ b/include/lbann/callbacks/checkpoint.hpp
@@ -126,11 +126,11 @@ class checkpoint : public callback_base {
     return *m_active_trainer;
   }
 
-  inline void set_active_training_algorithm(training_algorithm* t){
+  inline void set_active_training_algorithm(TrainingAlgorithm* t){
     m_active_training_algorithm = t;
   }
 
-  inline training_algorithm& get_active_training_algorithm(){
+  inline TrainingAlgorithm& get_active_training_algorithm(){
     if(m_active_training_algorithm == nullptr) {
       LBANN_ERROR("No active training algorithm for the checkpoint callback");
     }
@@ -220,7 +220,7 @@ class checkpoint : public callback_base {
     size_t step);
 private:
   trainer* m_active_trainer;
-  training_algorithm* m_active_training_algorithm;
+  TrainingAlgorithm* m_active_training_algorithm;
   std::string m_checkpoint_dir;
   // If the restart directory is not explicity set, default to the
   // checkpoint directory
diff --git a/include/lbann/callbacks/dump_outputs.hpp b/include/lbann/callbacks/dump_outputs.hpp
index 73bdb9db9d0..431d30bd2a3 100644
--- a/include/lbann/callbacks/dump_outputs.hpp
+++ b/include/lbann/callbacks/dump_outputs.hpp
@@ -83,7 +83,7 @@ class dump_outputs : public callback_base {
     do_dump_outputs(*m, *l);
   }
   void on_evaluate_forward_prop_end(model* m, Layer* l) override {
-    const auto& c = static_cast<const sgd_execution_context&>(m->get_execution_context());
+    const auto& c = static_cast<const SGDExecutionContext&>(m->get_execution_context());
     if(c.get_step() % m_batch_interval == 0) {
       do_dump_outputs(*m, *l);
     }
diff --git a/include/lbann/data_coordinator/data_coordinator.hpp b/include/lbann/data_coordinator/data_coordinator.hpp
index d4a74c9847b..d44ab028219 100644
--- a/include/lbann/data_coordinator/data_coordinator.hpp
+++ b/include/lbann/data_coordinator/data_coordinator.hpp
@@ -29,7 +29,9 @@
 
 #include "lbann/data_coordinator/data_coordinator_metadata.hpp"
 #include "lbann/utils/dataset.hpp"
-#include "lbann/execution_contexts/execution_context.hpp"
+#include "lbann/execution_algorithms/execution_context.hpp"
+#include "lbann/utils/threads/thread_pool.hpp"
+
 #ifdef LBANN_HAS_DISTCONV
 #include "lbann/data_readers/data_reader_hdf5_legacy.hpp"
 #endif // LBANN_HAS_DISTCONV
@@ -122,7 +124,7 @@ class data_coordinator {
   }
 
   /** Grab the training context of the data coordinator */
-  const execution_context& get_execution_context() const {
+  const ExecutionContext& get_execution_context() const {
     if(m_execution_context == nullptr) {
       LBANN_ERROR("execution context is not set");
     }
@@ -130,8 +132,8 @@ class data_coordinator {
   }
 
   /** Grab the training context of the data coordinator */
-  execution_context& get_execution_context() {
-    return const_cast<execution_context&>(static_cast<const data_coordinator&>(*this).get_execution_context());
+  ExecutionContext& get_execution_context() {
+    return const_cast<ExecutionContext&>(static_cast<const data_coordinator&>(*this).get_execution_context());
   }
 
   /** Return the I/O thread pool */
@@ -380,8 +382,8 @@ class data_coordinator {
 
   // At the start of the epoch, set the execution mode and make sure
   // that each layer points to this model
-  void reset_mode(execution_context& context) {
-    m_execution_context = static_cast<observer_ptr<execution_context>>(&context);
+  void reset_mode(ExecutionContext& context) {
+    m_execution_context = static_cast<observer_ptr<ExecutionContext>>(&context);
   }
 
   /** @name Helper functions to access the dataset statistics */
@@ -506,7 +508,7 @@ class data_coordinator {
   std::mutex dr_mutex;
 
   /** Pointer to the execution context object used for training or evaluating this model */
-  observer_ptr<execution_context> m_execution_context;
+  observer_ptr<ExecutionContext> m_execution_context;
 
   observer_ptr<thread_pool> m_io_thread_pool;
 };
diff --git a/include/lbann/execution_algorithms/batch_functional_inference_algorithm.hpp b/include/lbann/execution_algorithms/batch_functional_inference_algorithm.hpp
index 205dd69361d..1ba9712fc80 100644
--- a/include/lbann/execution_algorithms/batch_functional_inference_algorithm.hpp
+++ b/include/lbann/execution_algorithms/batch_functional_inference_algorithm.hpp
@@ -29,7 +29,7 @@
 
 #include "lbann/callbacks/callback.hpp"
 #include "lbann/data_coordinator/data_coordinator.hpp"
-#include "lbann/execution_contexts/sgd_execution_context.hpp"
+#include "lbann/execution_algorithms/sgd_execution_context.hpp"
 #include "lbann/layers/data_type_layer.hpp"
 #include "lbann/layers/io/input_layer.hpp"
 #include "lbann/models/model.hpp"
@@ -89,7 +89,7 @@ class batch_functional_inference_algorithm {
     // Create an SGD_execution_context so that layer.forward_prop can get the
     // mini_batch_size - This should be fixed in the future, when SGD is not so
     // hard-coded into the model & layers
-    auto c = sgd_execution_context(execution_mode::inference, mbs);
+    auto c = SGDExecutionContext(execution_mode::inference, mbs);
     model->reset_mode(c, execution_mode::inference);
 
     // Infer on mini batches
diff --git a/include/lbann/execution_contexts/execution_context.hpp b/include/lbann/execution_algorithms/execution_context.hpp
similarity index 79%
rename from include/lbann/execution_contexts/execution_context.hpp
rename to include/lbann/execution_algorithms/execution_context.hpp
index 3ab73fb5016..63f56b4740a 100644
--- a/include/lbann/execution_contexts/execution_context.hpp
+++ b/include/lbann/execution_algorithms/execution_context.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -24,13 +24,13 @@
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
 
-#ifndef LBANN_EXECUTION_CONTEXT_HPP
-#define LBANN_EXECUTION_CONTEXT_HPP
+#ifndef LBANN_EXECUTION_ALGORITHMS_EXECUTION_CONTEXT_HPP_INCLUDED
+#define LBANN_EXECUTION_ALGORITHMS_EXECUTION_CONTEXT_HPP_INCLUDED
 
 #include "lbann/base.hpp"
-#include "lbann/comm.hpp"
-#include "lbann/io/persist.hpp"
-#include "lbann/utils/threads/thread_pool.hpp"
+
+#include <memory>
+#include <string>
 
 // Forward declaration
 namespace cereal {
@@ -40,20 +40,21 @@ class access;
 namespace lbann {
 
 // Forward-declare this.
+class persist;
 class trainer;
-class training_algorithm;
+class TrainingAlgorithm;
 
-class execution_context
+class ExecutionContext
 {
 public:
   /** Constructor. */
-  execution_context();
+  ExecutionContext();
 
   /** Destructor. */
-  virtual ~execution_context() = default;
+  virtual ~ExecutionContext() = default;
 
   /** Get a "clean" execution_context of the same type. */
-  virtual std::unique_ptr<execution_context> get_new() const = 0;
+  virtual std::unique_ptr<ExecutionContext> get_new() const = 0;
 
   /** @brief Get a string identifying the type of execution context.
    *  @details Should match the training algorithm.
@@ -100,13 +101,13 @@ class execution_context
 protected:
   friend class cereal::access;
   /** Copy constructor. */
-  execution_context(const execution_context& other) = delete;
+  ExecutionContext(const ExecutionContext& other) = delete;
   /** Copy assignment operator. */
-  execution_context& operator=(const execution_context& other) = delete;
+  ExecutionContext& operator=(const ExecutionContext& other) = delete;
   /** Move constructor. */
-  execution_context(execution_context&& other) = default;
+  ExecutionContext(ExecutionContext&& other) = default;
   /** Move assignment operator. */
-  execution_context& operator=(execution_context&& other) = default;
+  ExecutionContext& operator=(ExecutionContext&& other) = default;
 
 private:
 
@@ -123,13 +124,14 @@ class execution_context
  *  algorithm, and specifically its execution context, but can
  *  otherwise be anything meaningful in the context of that algorithm.
 */
-class termination_criteria
+class TerminationCriteria
 {
 public:
-  termination_criteria() = default;
-  virtual ~termination_criteria() = default;
-  virtual bool operator()(execution_context const& c) const = 0;
+  TerminationCriteria() = default;
+  virtual ~TerminationCriteria() = default;
+  virtual bool operator()(ExecutionContext const& c) const = 0;
 };
 
 } // namespace lbann
-#endif // LBANN_EXECUTION_CONTEXT_HPP
+
+#endif // LBANN_EXECUTION_ALGORITHMS_EXECUTION_CONTEXT_HPP_INCLUDED
diff --git a/include/lbann/execution_algorithms/factory.hpp b/include/lbann/execution_algorithms/factory.hpp
index f2bd8720874..580e1fe985c 100644
--- a/include/lbann/execution_algorithms/factory.hpp
+++ b/include/lbann/execution_algorithms/factory.hpp
@@ -27,7 +27,7 @@
 #define LBANN_EXECUTION_ALGORITHMS_FACTORY_HPP_INCLUDED
 
 #include "lbann/execution_algorithms/training_algorithm.hpp"
-#include "lbann/execution_contexts/execution_context.hpp"
+#include "lbann/execution_algorithms/execution_context.hpp"
 #include "lbann/proto/helpers.hpp"
 #include "lbann/utils/factory.hpp"
 #include "lbann/utils/factory_error_policies.hpp"
@@ -47,9 +47,9 @@ namespace lbann {
  *         messages.
  */
 using TrainingAlgorithmFactory = generic_factory<
-  training_algorithm,
+  TrainingAlgorithm,
   std::string,
-  proto::generate_builder_type<training_algorithm,
+  proto::generate_builder_type<TrainingAlgorithm,
                                google::protobuf::Message const&>>;
 
 /** @brief The builder type used to create a new training algorithm.
@@ -74,8 +74,8 @@ void register_new_training_algorithm(TrainingAlgorithmKey key,
  *  @return A newly-constructed training algorithm.
  */
 template <>
-std::unique_ptr<lbann::training_algorithm>
-lbann::make_abstract<lbann::training_algorithm>(
+std::unique_ptr<lbann::TrainingAlgorithm>
+lbann::make_abstract<lbann::TrainingAlgorithm>(
   google::protobuf::Message const& params);
 
 #endif // LBANN_EXECUTION_ALGORITHMS_FACTORY_HPP_INCLUDED
diff --git a/include/lbann/execution_algorithms/kfac.hpp b/include/lbann/execution_algorithms/kfac.hpp
index d1d4db990c6..30840350071 100644
--- a/include/lbann/execution_algorithms/kfac.hpp
+++ b/include/lbann/execution_algorithms/kfac.hpp
@@ -30,7 +30,7 @@
 #include "lbann/execution_algorithms/factory.hpp"
 #include "lbann/execution_algorithms/training_algorithm.hpp"
 #include "lbann/execution_algorithms/kfac/execution_context.hpp"
-#include "lbann/execution_contexts/sgd_execution_context.hpp"
+#include "lbann/execution_algorithms/sgd_execution_context.hpp"
 #include "lbann/models/directed_acyclic_graph.hpp"
 #include "lbann/trainers/trainer.hpp"
 #include "lbann/utils/cloneable.hpp"
@@ -57,13 +57,13 @@ namespace lbann {
  *  deep convolutional neural networks." Proceedings of the IEEE
  *  Conference on Computer Vision and Pattern Recognition. 2019.
  */
-class KFAC final : public Cloneable<KFAC, training_algorithm>
+class KFAC final : public Cloneable<KFAC, TrainingAlgorithm>
 {
-  using BaseType = Cloneable<KFAC, training_algorithm>;
+  using BaseType = Cloneable<KFAC, TrainingAlgorithm>;
 
 public:
-  using TermCriteriaType = sgd_termination_criteria;
-  using ExeContextType = kfac::ExecutionContext;
+  using TermCriteriaType = SGDTerminationCriteria;
+  using ExeContextType = kfac::KFACExecutionContext;
 
 public:
   /** @name Life-cycle management */
@@ -106,7 +106,7 @@ class KFAC final : public Cloneable<KFAC, training_algorithm>
    *  @param[in,out] dc The data source for training.
    *  @param[in] mode Completely superfluous.
    */
-  void apply(execution_context& context,
+  void apply(ExecutionContext& context,
              model& m,
              data_coordinator& dc,
              execution_mode mode) final;
@@ -161,8 +161,8 @@ class KFAC final : public Cloneable<KFAC, training_algorithm>
   /** @brief Covariant return-friendly implementation of
    *         `get_new_exection_context()`.
    */
-  kfac::ExecutionContext* do_get_new_execution_context() const final;
-  
+  kfac::KFACExecutionContext* do_get_new_execution_context() const final;
+
   void send_recv_inverse_matrices(
     ExeContextType& context,
     lbann_comm *comm);
@@ -241,7 +241,7 @@ class KFAC final : public Cloneable<KFAC, training_algorithm>
   bool m_has_kronecker_inverse=false;
   size_t m_compute_interval;
 
-  El::Matrix<double, El::Device::CPU> m_inverse_matrices_size; 
+  El::Matrix<double, El::Device::CPU> m_inverse_matrices_size;
 
 }; // class KFAC
 
diff --git a/include/lbann/execution_algorithms/kfac/execution_context.hpp b/include/lbann/execution_algorithms/kfac/execution_context.hpp
index 6d63265a880..f6d1729c8b4 100644
--- a/include/lbann/execution_algorithms/kfac/execution_context.hpp
+++ b/include/lbann/execution_algorithms/kfac/execution_context.hpp
@@ -26,8 +26,8 @@
 #ifndef LBANN_EXECUTION_ALGORITHMS_KFAC_EXECUTION_CONTEXT_HPP_INCLUDED
 #define LBANN_EXECUTION_ALGORITHMS_KFAC_EXECUTION_CONTEXT_HPP_INCLUDED
 
-#include "lbann/execution_contexts/execution_context.hpp"
-#include "lbann/execution_contexts/sgd_execution_context.hpp"
+#include "lbann/execution_algorithms/execution_context.hpp"
+#include "lbann/execution_algorithms/sgd_execution_context.hpp"
 #include "lbann/execution_algorithms/kfac/kfac_block.hpp"
 #include "lbann/execution_algorithms/kfac/kfac_util.hpp"
 #include <memory>
@@ -53,28 +53,28 @@ constexpr El::Device Device = El::Device::CPU;
 /** @class ExecutionContext
  *  @brief The execution context for an KFAC algorithm.
  */
-class ExecutionContext final : public lbann::execution_context
+class KFACExecutionContext final : public lbann::ExecutionContext
 {
 public:
   friend class ::lbann::KFAC;
 
   /** Constructor. */
-  ExecutionContext(
+  KFACExecutionContext(
     size_t mini_batch_size,
     double damping_act,
     double damping_err,
     double damping_bn_act,
     double damping_bn_err);
   /** Destructor. */
-  ~ExecutionContext() = default;
+  ~KFACExecutionContext() = default;
 
   /** Copy constructor -- deleted. */
-  ExecutionContext(const ExecutionContext& other) = delete;
+  KFACExecutionContext(const KFACExecutionContext& other) = delete;
   /** Copy assignment operator -- deleted. */
-  ExecutionContext& operator=(const ExecutionContext& other) = delete;
+  KFACExecutionContext& operator=(const KFACExecutionContext& other) = delete;
 
   /** Get a "clean" execution_context of the same type. */
-  std::unique_ptr<lbann::execution_context> get_new() const override;
+  std::unique_ptr<lbann::ExecutionContext> get_new() const override;
 
   /** @brief Get a string identifying the type of execution context.
    *  @details Should match the training algorithm.
@@ -86,7 +86,7 @@ class ExecutionContext final : public lbann::execution_context
   std::string get_state_string() const noexcept override;
 
   /** @brief Return execution context for SGD-family training algorithm. */
-  inline sgd_execution_context& get_sgd_execution_context() noexcept
+  inline SGDExecutionContext& get_sgd_execution_context() noexcept
   {
     return m_sgd_execution_context;
   }
@@ -116,7 +116,7 @@ class ExecutionContext final : public lbann::execution_context
 
 private:
 
-  sgd_execution_context m_sgd_execution_context;
+  SGDExecutionContext m_sgd_execution_context;
 
   /** @brief The current damping values. */
   double m_damping_act, m_damping_err,
diff --git a/include/lbann/execution_algorithms/kfac/kfac_block.hpp b/include/lbann/execution_algorithms/kfac/kfac_block.hpp
index 250210be994..4eb36f739a3 100644
--- a/include/lbann/execution_algorithms/kfac/kfac_block.hpp
+++ b/include/lbann/execution_algorithms/kfac/kfac_block.hpp
@@ -34,7 +34,7 @@ namespace lbann {
 
 // Forward declaration
 namespace kfac {
-class ExecutionContext;
+class KFACExecutionContext;
 }
 
 /** A building block for K-FAC.
@@ -46,7 +46,7 @@ class kfac_block {
   /** Constructor.
    */
   kfac_block(Layer* layer,
-             kfac::ExecutionContext* context,
+             kfac::KFACExecutionContext* context,
              size_t layer_id,
              size_t inverse_proc_rank)
       : m_layer(layer),
@@ -104,7 +104,7 @@ class kfac_block {
     LBANN_ERROR("this function should be called via a sub-class.");
   }
 
-  /** @brief Copies activations, errors, and weights from model class to 
+  /** @brief Copies activations, errors, and weights from model class to
   private variables to be used in KFAC computation. */
   virtual void initialize_activations_and_errors(
       lbann_comm* comm,
@@ -128,10 +128,10 @@ class kfac_block {
 
   /** @brief Get inverse matrices size (offset). */
   virtual int
-  get_inverse_matrices_size(lbann_comm *comm) = 0; 
+  get_inverse_matrices_size(lbann_comm *comm) = 0;
 
   /** @brief Get inverse matrices size vector */
-  virtual std::vector<int> 
+  virtual std::vector<int>
   get_inverse_matrices_size_vector(lbann_comm *comm) = 0;
 
   /** @brief Get inverse matrices size vector */
@@ -217,13 +217,13 @@ class kfac_block {
   /** @brief Translatebetweengrid  funciton has a basic implementation for STAR,STAR
    * distributed matrices. Therefore, using local matrices for weights  */
   std::vector<std::unique_ptr<El::Matrix<DataType, Device>>> m_weight_values;
-  
+
 
  private:
 
   /** @brief The execution context that created this block.
    *  TODO: Use its own workspace and remove this pointer. */
-  kfac::ExecutionContext* m_context;
+  kfac::KFACExecutionContext* m_context;
 
 };
 
diff --git a/include/lbann/execution_algorithms/kfac/kfac_block_bn.hpp b/include/lbann/execution_algorithms/kfac/kfac_block_bn.hpp
index 55f93c6e50f..3c8934b61a3 100644
--- a/include/lbann/execution_algorithms/kfac/kfac_block_bn.hpp
+++ b/include/lbann/execution_algorithms/kfac/kfac_block_bn.hpp
@@ -60,7 +60,7 @@ class kfac_block_bn: public kfac_block<Device> {
   /** Constructor.
    */
   kfac_block_bn(Layer* layer,
-                kfac::ExecutionContext* context,
+                kfac::KFACExecutionContext* context,
                 size_t layer_id,
                 size_t inverse_proc_rank)
       : kfac_block<Device>(layer, context, layer_id, inverse_proc_rank) {
@@ -152,7 +152,7 @@ class kfac_block_bn: public kfac_block<Device> {
   }
 
   /** @brief Get inverse matrices size vector */
-  std::vector<int> 
+  std::vector<int>
   get_inverse_matrices_size_vector(lbann_comm *comm) override
   {
     LBANN_ERROR("Sub-grid parallelism  is not implemented for BN layer");
diff --git a/include/lbann/execution_algorithms/kfac/kfac_block_fc_conv.hpp b/include/lbann/execution_algorithms/kfac/kfac_block_fc_conv.hpp
index 46387e0c4f0..b870a0c8eed 100644
--- a/include/lbann/execution_algorithms/kfac/kfac_block_fc_conv.hpp
+++ b/include/lbann/execution_algorithms/kfac/kfac_block_fc_conv.hpp
@@ -75,7 +75,7 @@ class kfac_block_fc_conv: public kfac_block<Device> {
   /** Constructor.
    */
   kfac_block_fc_conv(Layer* layer,
-                     kfac::ExecutionContext* context,
+                     kfac::KFACExecutionContext* context,
                      const size_t layer_id,
                      const size_t inverse_proc_rank,
                      const bool is_conv)
@@ -156,7 +156,7 @@ class kfac_block_fc_conv: public kfac_block<Device> {
       lbann_comm* comm,
       int num_local_activations,
       int num_local_errors,
-      int num_weights) override;  
+      int num_weights) override;
 
   const std::vector<El::AbstractMatrix<DataType>*>
   get_preconditioned_grad_buffers() override;
diff --git a/include/lbann/execution_algorithms/kfac/kfac_block_gru.hpp b/include/lbann/execution_algorithms/kfac/kfac_block_gru.hpp
index baefebbbad8..0e16543d800 100644
--- a/include/lbann/execution_algorithms/kfac/kfac_block_gru.hpp
+++ b/include/lbann/execution_algorithms/kfac/kfac_block_gru.hpp
@@ -114,7 +114,7 @@ class kfac_block_gru: public kfac_block<Device> {
   /** Constructor.
    */
   kfac_block_gru(Layer* layer,
-                 kfac::ExecutionContext* context,
+                 kfac::KFACExecutionContext* context,
                  size_t layer_id,
                  size_t inverse_proc_rank)
       : kfac_block<Device>(layer, context, layer_id, inverse_proc_rank) {
@@ -177,7 +177,7 @@ class kfac_block_gru: public kfac_block<Device> {
   int get_inverse_matrices(
       El::Matrix<DataType, Device>& output,
       int offset) override;
-  
+
 
   /** @brief Get inverse matrices size (offset). */
   int get_inverse_matrices_size(lbann_comm *comm) override;
@@ -190,7 +190,7 @@ class kfac_block_gru: public kfac_block<Device> {
   void send_recv_weights(lbann_comm *comm);
 
   /** @brief Get inverse matrices size vector */
-  std::vector<int> 
+  std::vector<int>
   get_inverse_matrices_size_vector(lbann_comm *comm) override
   {
     LBANN_ERROR("This function is not yet implemented for GRU layer");
@@ -203,7 +203,7 @@ class kfac_block_gru: public kfac_block<Device> {
     LBANN_ERROR("This function is not yet implemented for GRU layer");
   }
 
-    
+
 
   const std::vector<El::AbstractMatrix<DataType>*>
   get_preconditioned_grad_buffers() override;
@@ -229,7 +229,7 @@ class kfac_block_gru: public kfac_block<Device> {
   void get_weight_matrix(
       kfac_gru_util::weight_type matrix_type,
       El::Matrix<DataType, Device>& view);
-  
+
   void get_gradient_matrix(
       kfac_gru_util::weight_type matrix_type,
       El::Matrix<DataType, Device>& view);
diff --git a/include/lbann/execution_algorithms/ltfb.hpp b/include/lbann/execution_algorithms/ltfb.hpp
index e12c04d1e9d..b06685538f1 100644
--- a/include/lbann/execution_algorithms/ltfb.hpp
+++ b/include/lbann/execution_algorithms/ltfb.hpp
@@ -64,13 +64,13 @@ namespace lbann {
  *  then do some other stuff, this class can certainly serve as a
  *  useful guide, but is not likely to be the out-of-the-box solution.
  */
-class LTFB final : public Cloneable<LTFB, training_algorithm>
+class LTFB final : public Cloneable<LTFB, TrainingAlgorithm>
 {
-  using BaseType = Cloneable<LTFB, training_algorithm>;
+  using BaseType = Cloneable<LTFB, TrainingAlgorithm>;
 
 public:
-  using TermCriteriaType = ltfb::TerminationCriteria;
-  using ExeContextType = ltfb::ExecutionContext;
+  using TermCriteriaType = ltfb::LTFBTerminationCriteria;
+  using ExeContextType = ltfb::LTFBExecutionContext;
 
 public:
   /** @name Life-cycle management */
@@ -81,9 +81,9 @@ class LTFB final : public Cloneable<LTFB, training_algorithm>
    *  @param meta_learning_strategy The postprocessing algorithm.
    */
   LTFB(std::string name,
-       std::unique_ptr<training_algorithm> local_training_algorithm,
+       std::unique_ptr<TrainingAlgorithm> local_training_algorithm,
        std::unique_ptr<ltfb::MetaLearningStrategy> meta_learning_strategy,
-       ltfb::TerminationCriteria stopping_criteria)
+       ltfb::LTFBTerminationCriteria stopping_criteria)
     : BaseType{std::move(name)}, m_local_algo{std::move(
                                    local_training_algorithm)},
       m_meta_learning_strategy{std::move(meta_learning_strategy)},
@@ -110,7 +110,7 @@ class LTFB final : public Cloneable<LTFB, training_algorithm>
    *  @param[in,out] dc The data source for training.
    *  @param[in] mode Completely superfluous.
    */
-  void apply(execution_context& context,
+  void apply(ExecutionContext& context,
              model& m,
              data_coordinator& dc,
              execution_mode mode) final;
@@ -119,20 +119,20 @@ class LTFB final : public Cloneable<LTFB, training_algorithm>
   /** @brief Covariant return-friendly implementation of
    *         `get_new_exection_context()`.
    */
-  ltfb::ExecutionContext* do_get_new_execution_context() const final
+  ltfb::LTFBExecutionContext* do_get_new_execution_context() const final
   {
-    return new ltfb::ExecutionContext();
+    return new ltfb::LTFBExecutionContext();
   }
 
 private:
   /** @brief The training algorithm for trainer-local training. */
-  std::unique_ptr<training_algorithm> m_local_algo;
+  std::unique_ptr<TrainingAlgorithm> m_local_algo;
 
   /** @brief The strategy for postprocessing local training outputs. */
   std::unique_ptr<ltfb::MetaLearningStrategy> m_meta_learning_strategy;
 
   /** @brief The LTFB stopping criteria. */
-  ltfb::TerminationCriteria m_termination_criteria;
+  ltfb::LTFBTerminationCriteria m_termination_criteria;
 
 }; // class LTFB
 
diff --git a/include/lbann/execution_algorithms/ltfb/execution_context.hpp b/include/lbann/execution_algorithms/ltfb/execution_context.hpp
index ea23f3f7672..8145458cfc1 100644
--- a/include/lbann/execution_algorithms/ltfb/execution_context.hpp
+++ b/include/lbann/execution_algorithms/ltfb/execution_context.hpp
@@ -26,7 +26,9 @@
 #ifndef LBANN_EXECUTION_ALGORITHMS_LTFB_EXECUTION_CONTEXT_HPP_INCLUDED
 #define LBANN_EXECUTION_ALGORITHMS_LTFB_EXECUTION_CONTEXT_HPP_INCLUDED
 
-#include "lbann/execution_contexts/execution_context.hpp"
+#include "lbann/execution_algorithms/execution_context.hpp"
+#include "lbann/utils/exception.hpp"
+
 #include <memory>
 #include <string>
 
@@ -42,16 +44,16 @@ namespace ltfb {
  *  object. My inclination is that it really doesn't matter and either
  *  would work.
  */
-class ExecutionContext final : public lbann::execution_context
+class LTFBExecutionContext final : public lbann::ExecutionContext
 {
 public:
-  ExecutionContext() = default;
-  ~ExecutionContext() = default;
+  LTFBExecutionContext() = default;
+  ~LTFBExecutionContext() = default;
 
   /** Get a "clean" execution_context of the same type. */
-  std::unique_ptr<lbann::execution_context> get_new() const override
+  std::unique_ptr<lbann::ExecutionContext> get_new() const override
   {
-    return std::make_unique<ExecutionContext>();
+    return std::make_unique<LTFBExecutionContext>();
   }
 
   /** @brief Get a string identifying the type of execution context.
diff --git a/include/lbann/execution_algorithms/ltfb/meta_learning_strategy.hpp b/include/lbann/execution_algorithms/ltfb/meta_learning_strategy.hpp
index 79f4f85a4e7..23fcf38d0eb 100644
--- a/include/lbann/execution_algorithms/ltfb/meta_learning_strategy.hpp
+++ b/include/lbann/execution_algorithms/ltfb/meta_learning_strategy.hpp
@@ -76,7 +76,7 @@ class MetaLearningStrategy
    *  @param[in,out] dc The data coordinator for this trainer.
    */
   virtual void select_next(model& m,
-                           ltfb::ExecutionContext& ctxt,
+                           ltfb::LTFBExecutionContext& ctxt,
                            data_coordinator& dc) const = 0;
 }; // class MetaLearningStrategy
 
diff --git a/include/lbann/execution_algorithms/ltfb/random_pairwise_exchange.hpp b/include/lbann/execution_algorithms/ltfb/random_pairwise_exchange.hpp
index faa4179a2c3..313d2ab30d6 100644
--- a/include/lbann/execution_algorithms/ltfb/random_pairwise_exchange.hpp
+++ b/include/lbann/execution_algorithms/ltfb/random_pairwise_exchange.hpp
@@ -159,13 +159,13 @@ class RandomPairwiseExchange final
    *  @param[in,out] dc The data source for the tournament.
    */
   void select_next(model& m,
-                   ltfb::ExecutionContext& ctxt,
+                   ltfb::LTFBExecutionContext& ctxt,
                    data_coordinator& dc) const final;
 
 private:
   /** @brief Get the value of the given metric from the model. */
   std::unordered_map<std::string, EvalType>
-  evaluate_model(model& m, ExecutionContext& ctxt, data_coordinator& dc) const;
+  evaluate_model(model& m, LTFBExecutionContext& ctxt, data_coordinator& dc) const;
   /** @brief Generate a new trainer partner from the comm. */
   El::Int get_partner_trainer(lbann_comm const& c) const noexcept;
   /** @brief Evaluate the output of two models according to the input
@@ -202,12 +202,12 @@ class RandomPairwiseExchange final
 
   /** @brief The strategy for mutation of a model
    *
-   *  When a trainer loses in a LTFB tournament, the winning model is 
+   *  When a trainer loses in a LTFB tournament, the winning model is
    *  copied over to it and this mutation strategy is applied to the
    *  copied model to explore a new model. This is relevant to neural
-   *  architecture search (NAS). 
+   *  architecture search (NAS).
    */
-  std::unique_ptr<MutationStrategy> m_mutate_algo;  
+  std::unique_ptr<MutationStrategy> m_mutate_algo;
 
 }; // class RandomPairwiseExchange
 
diff --git a/include/lbann/execution_algorithms/ltfb/regularized_evolution.hpp b/include/lbann/execution_algorithms/ltfb/regularized_evolution.hpp
index bb9c2665d59..a23bf39b794 100644
--- a/include/lbann/execution_algorithms/ltfb/regularized_evolution.hpp
+++ b/include/lbann/execution_algorithms/ltfb/regularized_evolution.hpp
@@ -67,13 +67,13 @@ class RegularizedEvolution final
   RegularizedEvolution(RegularizedEvolution const& other);
 
   void select_next(model& m,
-                   ltfb::ExecutionContext& ctxt,
+                   ltfb::LTFBExecutionContext& ctxt,
                    data_coordinator& dc) const final;
 
 private:
   /** @brief Get the value of the given metric from the model. */
   EvalType
-  evaluate_model(model& m, ExecutionContext& ctxt, data_coordinator& dc) const;
+  evaluate_model(model& m, LTFBExecutionContext& ctxt, data_coordinator& dc) const;
 
 private:
   /** @brief The strategy for mutation of a model
diff --git a/include/lbann/execution_algorithms/ltfb/termination_criteria.hpp b/include/lbann/execution_algorithms/ltfb/termination_criteria.hpp
index de2e8744635..86f1db708c7 100644
--- a/include/lbann/execution_algorithms/ltfb/termination_criteria.hpp
+++ b/include/lbann/execution_algorithms/ltfb/termination_criteria.hpp
@@ -27,7 +27,7 @@
 #define LBANN_EXECUTION_ALGORITHMS_LTFB_TERMINATION_CRITERIA_HPP_INCLUDED
 
 #include "lbann/execution_algorithms/ltfb/execution_context.hpp"
-#include "lbann/execution_contexts/execution_context.hpp"
+#include "lbann/execution_algorithms/execution_context.hpp"
 
 namespace lbann {
 namespace ltfb {
@@ -37,19 +37,19 @@ namespace ltfb {
  *
  *  An object here needs to manage
  */
-class TerminationCriteria final : public lbann::termination_criteria
+class LTFBTerminationCriteria final : public lbann::TerminationCriteria
 {
 public:
-  TerminationCriteria(size_t max_metalearning_steps)
+  LTFBTerminationCriteria(size_t max_metalearning_steps)
     : m_max_metalearning_steps{max_metalearning_steps}
   {}
-  ~TerminationCriteria() = default;
-  bool operator()(execution_context const& c) const final
+  ~LTFBTerminationCriteria() = default;
+  bool operator()(ExecutionContext const& c) const final
   {
-    return this->operator()(dynamic_cast<ExecutionContext const&>(c));
+    return this->operator()(dynamic_cast<LTFBExecutionContext const&>(c));
   }
   /** @brief Decide if the criteria are fulfilled. */
-  bool operator()(ExecutionContext const& exe_state) const noexcept
+  bool operator()(LTFBExecutionContext const& exe_state) const noexcept
   {
     return exe_state.get_step() >= m_max_metalearning_steps;
   }
diff --git a/include/lbann/execution_algorithms/ltfb/truncation_selection_exchange.hpp b/include/lbann/execution_algorithms/ltfb/truncation_selection_exchange.hpp
index 7018d1f47eb..ed4817522e5 100644
--- a/include/lbann/execution_algorithms/ltfb/truncation_selection_exchange.hpp
+++ b/include/lbann/execution_algorithms/ltfb/truncation_selection_exchange.hpp
@@ -95,13 +95,13 @@ class TruncationSelectionExchange final
    *  @param[in,out] dc The data source for the tournament.
    */
   void select_next(model& m,
-                   ltfb::ExecutionContext& ctxt,
+                   ltfb::LTFBExecutionContext& ctxt,
                    data_coordinator& dc) const final;
 
 private:
   /** @brief Get the value of the given metric from the model. */
   EvalType
-  evaluate_model(model& m, ExecutionContext& ctxt, data_coordinator& dc) const;
+  evaluate_model(model& m, LTFBExecutionContext& ctxt, data_coordinator& dc) const;
 
 private:
   /** @brief The list of metric/strategy pairs.
diff --git a/include/lbann/execution_contexts/sgd_execution_context.hpp b/include/lbann/execution_algorithms/sgd_execution_context.hpp
similarity index 73%
rename from include/lbann/execution_contexts/sgd_execution_context.hpp
rename to include/lbann/execution_algorithms/sgd_execution_context.hpp
index 402a23e715a..718d0e39be7 100644
--- a/include/lbann/execution_contexts/sgd_execution_context.hpp
+++ b/include/lbann/execution_algorithms/sgd_execution_context.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -27,43 +27,40 @@
 #ifndef LBANN_SGD_EXECUTION_CONTEXT_HPP
 #define LBANN_SGD_EXECUTION_CONTEXT_HPP
 
-#include "lbann/base.hpp"
-#include "lbann/execution_contexts/execution_context.hpp"
+#include "lbann/execution_algorithms/execution_context.hpp"
 #include "lbann/utils/cloneable.hpp"
+#include "lbann/utils/exception.hpp"
 #include "lbann/utils/timer.hpp"
 
-#include <cstddef>
-#include <limits>
-
 namespace lbann {
 
 /** @brief SGD Uses the step to track the Current mini-batch step for
  *  execution mode.
  *  @details Step counts are not reset after each epoch.
  */
-class sgd_execution_context final : public execution_context
+class SGDExecutionContext final : public ExecutionContext
 {
 public:
   /** Constructor. */
-  sgd_execution_context(execution_mode mode, size_t mini_batch_size);
+  SGDExecutionContext(execution_mode mode, size_t mini_batch_size);
   /** Destructor. */
-  virtual ~sgd_execution_context() = default;
+  virtual ~SGDExecutionContext() = default;
 
   /** Move constructor. */
-  sgd_execution_context(sgd_execution_context&& other) = default;
+  SGDExecutionContext(SGDExecutionContext&& other) = default;
   /** Move assignment operator. */
-  sgd_execution_context& operator=(sgd_execution_context&& other) = default;
+  SGDExecutionContext& operator=(SGDExecutionContext&& other) = default;
   /** @brief Get a clean sgd_execution_context. */
 
   /** Copy constructor -- deleted. */
-  sgd_execution_context(const sgd_execution_context& other) = delete;
+  SGDExecutionContext(const SGDExecutionContext& other) = delete;
   /** Copy assignment operator -- deleted. */
-  sgd_execution_context&
-  operator=(const sgd_execution_context& other) = delete;
+  SGDExecutionContext&
+  operator=(const SGDExecutionContext& other) = delete;
 
-  std::unique_ptr<execution_context> get_new() const override
+  std::unique_ptr<ExecutionContext> get_new() const override
   {
-    return make_unique<sgd_execution_context>(execution_mode::invalid, 0UL);
+    return std::make_unique<SGDExecutionContext>(execution_mode::invalid, 0UL);
   }
 
   /** Archive for checkpoint and restart */
@@ -143,7 +140,7 @@ class sgd_execution_context final : public execution_context
 
 private:
   friend class cereal::access;
-  sgd_execution_context() = default;
+  SGDExecutionContext() = default;
 
 private:
   /** @brief Timer tracking execution time. */
@@ -168,19 +165,19 @@ class sgd_execution_context final : public execution_context
 };
 
 /** @brief Base class for SGD stopping. */
-class sgd_termination_criteria
-  : public termination_criteria,
-    public Cloneable<HasAbstractFunction<sgd_termination_criteria>>
+class SGDTerminationCriteria
+  : public TerminationCriteria,
+    public Cloneable<HasAbstractFunction<SGDTerminationCriteria>>
 {
 public:
-  sgd_termination_criteria() = default;
-  virtual ~sgd_termination_criteria() = default;
-  bool operator()(execution_context const& c_in) const final {
-    auto const& c = dynamic_cast<sgd_execution_context const&>(c_in);
+  SGDTerminationCriteria() = default;
+  virtual ~SGDTerminationCriteria() = default;
+  bool operator()(ExecutionContext const& c_in) const final {
+    auto const& c = dynamic_cast<SGDExecutionContext const&>(c_in);
     return c.get_early_stop() || this->is_done(c);
   }
 private:
-  virtual bool is_done(sgd_execution_context const& c) const noexcept = 0;
+  virtual bool is_done(SGDExecutionContext const& c) const noexcept = 0;
 };
 
 /** @brief Stop SGD based on a fixed batch count.
@@ -188,16 +185,16 @@ class sgd_termination_criteria
  *  The training algorithm still tracks the epoch count for other
  *  parts of the code (e.g. at_epoch_begin/end callbacks).
  */
-class batch_termination_criteria
-  : public Cloneable<batch_termination_criteria, sgd_termination_criteria>
+class BatchTerminationCriteria
+  : public Cloneable<BatchTerminationCriteria, SGDTerminationCriteria>
 {
 public:
-  batch_termination_criteria(size_t num_batches)
+  BatchTerminationCriteria(size_t num_batches)
     : m_max_batches{num_batches}
   {}
 
 private:
-  bool is_done(sgd_execution_context const& c) const noexcept final {
+  bool is_done(SGDExecutionContext const& c) const noexcept final {
     return c.get_step() >= m_max_batches;
   }
 
@@ -205,16 +202,16 @@ class batch_termination_criteria
   size_t m_max_batches;
 };
 
-class epoch_termination_criteria
-  : public Cloneable<epoch_termination_criteria, sgd_termination_criteria>
+class EpochTerminationCriteria
+  : public Cloneable<EpochTerminationCriteria, SGDTerminationCriteria>
 {
 public:
-  epoch_termination_criteria(size_t num_epochs)
+  EpochTerminationCriteria(size_t num_epochs)
     : m_max_epochs{num_epochs}
   {}
 
 private:
-  bool is_done(sgd_execution_context const& c) const noexcept final {
+  bool is_done(SGDExecutionContext const& c) const noexcept final {
     return c.get_epoch() >= m_max_epochs;
   }
 
@@ -222,16 +219,16 @@ class epoch_termination_criteria
   size_t m_max_epochs;
 };
 
-class seconds_termination_criteria
-  : public Cloneable<seconds_termination_criteria, sgd_termination_criteria>
+class SecondsTerminationCriteria
+  : public Cloneable<SecondsTerminationCriteria, SGDTerminationCriteria>
 {
 public:
-  seconds_termination_criteria(double seconds)
+  SecondsTerminationCriteria(double seconds)
     : m_max_seconds{seconds}
   {}
 
 private:
-  bool is_done(sgd_execution_context const& c) const noexcept final;
+  bool is_done(SGDExecutionContext const& c) const noexcept final;
 
 private:
   double m_max_seconds;
diff --git a/include/lbann/execution_algorithms/sgd_training_algorithm.hpp b/include/lbann/execution_algorithms/sgd_training_algorithm.hpp
index 64f36f5f799..cf1d29ec7ba 100644
--- a/include/lbann/execution_algorithms/sgd_training_algorithm.hpp
+++ b/include/lbann/execution_algorithms/sgd_training_algorithm.hpp
@@ -30,8 +30,8 @@
 #include "lbann/base.hpp"
 #include "lbann/execution_algorithms/factory.hpp"
 #include "lbann/execution_algorithms/training_algorithm.hpp"
-#include "lbann/execution_contexts/execution_context.hpp"
-#include "lbann/execution_contexts/sgd_execution_context.hpp"
+#include "lbann/execution_algorithms/execution_context.hpp"
+#include "lbann/execution_algorithms/sgd_execution_context.hpp"
 #include "lbann/utils/cloneable.hpp"
 #include "lbann/utils/memory.hpp"
 #include <google/protobuf/message.h>
@@ -40,29 +40,29 @@
 namespace lbann {
 
 /** @brief Base class for LBANN SGD-family training algorithms. */
-class sgd_training_algorithm
-  : public Cloneable<sgd_training_algorithm, training_algorithm>
+class SGDTrainingAlgorithm
+  : public Cloneable<SGDTrainingAlgorithm, TrainingAlgorithm>
 {
-  using BaseType = Cloneable<sgd_training_algorithm, training_algorithm>;
+  using BaseType = Cloneable<SGDTrainingAlgorithm, TrainingAlgorithm>;
 
 public:
   /** @brief Construct with a name. */
-  sgd_training_algorithm(std::string name,
-                         std::unique_ptr<sgd_termination_criteria> stop)
+  SGDTrainingAlgorithm(std::string name,
+                         std::unique_ptr<SGDTerminationCriteria> stop)
     : BaseType{std::move(name)},
       m_stopping_criteria{std::move(stop)},
       m_validation_context{execution_mode::validation, 1UL},
       m_validation_epochs{1UL}
   {}
 
-  sgd_training_algorithm(const sgd_training_algorithm& other);
-  sgd_training_algorithm&
-  operator=(const sgd_training_algorithm& other);
+  SGDTrainingAlgorithm(const SGDTrainingAlgorithm& other);
+  SGDTrainingAlgorithm&
+  operator=(const SGDTrainingAlgorithm& other);
 
-  sgd_training_algorithm(sgd_training_algorithm&& other) = default;
-  sgd_training_algorithm& operator=(sgd_training_algorithm&& other) = default;
+  SGDTrainingAlgorithm(SGDTrainingAlgorithm&& other) = default;
+  SGDTrainingAlgorithm& operator=(SGDTrainingAlgorithm&& other) = default;
 
-  virtual ~sgd_training_algorithm() = default;
+  virtual ~SGDTrainingAlgorithm() = default;
   /** Copy training_algorithm. */
   //  virtual sgd_training_algorithm* copy() const = default;
 
@@ -74,23 +74,23 @@ class sgd_training_algorithm
 
   /** Apply the training algorithm to the model with the provided
       context and execution mode */
-  void apply(execution_context& c,
+  void apply(ExecutionContext& c,
              model& model,
              data_coordinator& dc,
              execution_mode mode) override;
 
   /** Train a model using an iterative SGD solver. */
-  void train(sgd_execution_context& c,
+  void train(SGDExecutionContext& c,
              model& model,
              data_coordinator& dc,
-             sgd_termination_criteria const& term);
+             SGDTerminationCriteria const& term);
 
   /** Evaluate a model using the forward pass of an SGD solver. */
-  void evaluate(sgd_execution_context& c,
+  void evaluate(SGDExecutionContext& c,
                 model& model,
                 data_coordinator& dc,
                 execution_mode mode,
-                sgd_termination_criteria const& term);
+                SGDTerminationCriteria const& term);
 
   /** @brief Get a default-initialized execution context.
    *  @note This method participates in the
@@ -98,7 +98,7 @@ class sgd_training_algorithm
    *        it hides the base-class method to give the illusion of a
    *        covariant return.
    */
-  std::unique_ptr<sgd_execution_context>
+  std::unique_ptr<SGDExecutionContext>
   get_new_execution_context() const
   {
     return to_unique_ptr(this->do_get_new_execution_context());
@@ -106,12 +106,12 @@ class sgd_training_algorithm
 
 protected:
   /** Train model on one step / mini-batch of an SGD forward pass */
-  virtual bool train_mini_batch(sgd_execution_context& c,
+  virtual bool train_mini_batch(SGDExecutionContext& c,
                                 model& model,
                                 data_coordinator& dc);
 
   /** Evaluate model on one step / mini-batch of an SGD forward pass */
-  virtual bool evaluate_mini_batch(sgd_execution_context& c,
+  virtual bool evaluate_mini_batch(SGDExecutionContext& c,
                                    model& model,
                                    data_coordinator& dc,
                                    execution_mode mode);
@@ -137,23 +137,23 @@ class sgd_training_algorithm
   /** Execute callbacks at end of mini-batch. */
   virtual void do_batch_end_cbs(model& model, execution_mode mode);
 
-  sgd_execution_context*
+  SGDExecutionContext*
   do_get_new_execution_context() const override;
 
 private:
-  std::unique_ptr<sgd_termination_criteria> m_stopping_criteria;
+  std::unique_ptr<SGDTerminationCriteria> m_stopping_criteria;
 
   // FIXME (trb 07/20/21): This is a hack. These aren't actually
   // copyable objects (it wouldn't make sense), so when the training
   // algorithm is copied, these are reset to defaults. "In the
   // future", we'll externalize validation and this won't be an issue.
-  sgd_execution_context m_validation_context;
+  SGDExecutionContext m_validation_context;
   size_t m_validation_epochs;
 };
 
 template <>
-std::unique_ptr<sgd_training_algorithm>
-make<sgd_training_algorithm>(google::protobuf::Message const& params);
+std::unique_ptr<SGDTrainingAlgorithm>
+make<SGDTrainingAlgorithm>(google::protobuf::Message const& params);
 
 } // namespace lbann
 
diff --git a/include/lbann/execution_algorithms/training_algorithm.hpp b/include/lbann/execution_algorithms/training_algorithm.hpp
index d13bd27c71e..19b178aeee2 100644
--- a/include/lbann/execution_algorithms/training_algorithm.hpp
+++ b/include/lbann/execution_algorithms/training_algorithm.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -24,12 +24,12 @@
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
 
-#ifndef LBANN_TRAINING_ALGORITHM_HPP
-#define LBANN_TRAINING_ALGORITHM_HPP
+#ifndef LBANN_EXECUTION_ALGORITHMS_TRAINING_ALGORITHM_HPP_INCLUDED
+#define LBANN_EXECUTION_ALGORITHMS_TRAINING_ALGORITHM_HPP_INCLUDED
 
 #include "lbann/base.hpp"
 #include "lbann/data_coordinator/data_coordinator.hpp"
-#include "lbann/execution_contexts/execution_context.hpp"
+#include "lbann/execution_algorithms/execution_context.hpp"
 #include "lbann/models/model.hpp"
 #include "lbann/utils/cloneable.hpp"
 #include "lbann/utils/make_abstract.hpp"
@@ -81,8 +81,8 @@ namespace lbann {
  *        learning scenario, in which repeatedly writing to and
  *        reading from disk is not sufficient.
  */
-class training_algorithm
-  : public Cloneable<HasAbstractFunction<training_algorithm>>
+class TrainingAlgorithm
+  : public Cloneable<HasAbstractFunction<TrainingAlgorithm>>
 {
 public:
   /** @name Lifecycle Management */
@@ -90,8 +90,8 @@ class training_algorithm
   /** @brief Constructor
    *  @param[in] name The user-defined name of the algorithm.
    */
-  training_algorithm(std::string name);
-  virtual ~training_algorithm() = default;
+  TrainingAlgorithm(std::string name);
+  virtual ~TrainingAlgorithm() = default;
   ///@}
 
   /** @name Queries */
@@ -115,7 +115,7 @@ class training_algorithm
    *  @param[in] mode IMO, superfluous. Will be removed.
    *  @param[in] term_criteria A description of when to stop training.
    */
-  virtual void apply(execution_context& context,
+  virtual void apply(ExecutionContext& context,
                      model& model,
                      data_coordinator& dc,
                      execution_mode mode) = 0;
@@ -159,7 +159,7 @@ class training_algorithm
    *        the Cloneable interface, for example. See
    *        `do_get_new_execution_context()`.
    */
-  std::unique_ptr<execution_context> get_new_execution_context() const
+  std::unique_ptr<ExecutionContext> get_new_execution_context() const
   {
     return to_unique_ptr(do_get_new_execution_context());
   }
@@ -168,16 +168,16 @@ class training_algorithm
 protected:
   /** @name In-hierarchy Lifecycle Management */
   ///@{
-  training_algorithm(const training_algorithm& other) = default;
-  training_algorithm& operator=(const training_algorithm& other) = default;
-  training_algorithm(training_algorithm&& other) = default;
-  training_algorithm& operator=(training_algorithm&& other) = default;
+  TrainingAlgorithm(const TrainingAlgorithm& other) = default;
+  TrainingAlgorithm& operator=(const TrainingAlgorithm& other) = default;
+  TrainingAlgorithm(TrainingAlgorithm&& other) = default;
+  TrainingAlgorithm& operator=(TrainingAlgorithm&& other) = default;
   ///@}
 
   /** @brief Covariant return-friendly implementation of
    *         `get_new_exection_context()`.
    */
-  virtual execution_context* do_get_new_execution_context() const = 0;
+  virtual ExecutionContext* do_get_new_execution_context() const = 0;
 
 private:
   /** @brief The user-defined name of the algorithm. */
@@ -186,4 +186,4 @@ class training_algorithm
 
 } // namespace lbann
 
-#endif // LBANN_TRAINING_ALGORITHM_HPP
+#endif // LBANN_EXECUTION_ALGORITHMS_TRAINING_ALGORITHM_HPP_INCLUDED
diff --git a/include/lbann/execution_contexts/CMakeLists.txt b/include/lbann/execution_contexts/CMakeLists.txt
deleted file mode 100644
index 79bd7243399..00000000000
--- a/include/lbann/execution_contexts/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# Add the headers for this directory
-set_full_path(THIS_DIR_HEADERS
-  execution_context.hpp
-  sgd_execution_context.hpp
-  )
-
-# Propagate the files up the tree
-set(HEADERS "${HEADERS}" "${THIS_DIR_HEADERS}" PARENT_SCOPE)
diff --git a/include/lbann/models/model.hpp b/include/lbann/models/model.hpp
index ea711f736b3..3e0d2fa2076 100644
--- a/include/lbann/models/model.hpp
+++ b/include/lbann/models/model.hpp
@@ -31,7 +31,7 @@
 #include "lbann/comm.hpp"
 #include "lbann/layers/layer.hpp"
 #include "lbann/data_coordinator/data_coordinator_metadata.hpp"
-#include "lbann/execution_contexts/execution_context.hpp"
+#include "lbann/execution_algorithms/execution_context.hpp"
 #include "lbann/utils/summary.hpp"
 #include "lbann/utils/graph.hpp"
 #include "lbann/io/file_io.hpp"
@@ -63,7 +63,7 @@ namespace lbann {
 
 // Forward declarations
 class lbann_callback;
-class training_algorithm;
+class TrainingAlgorithm;
 class callback_base;
 
 /** @brief Abstract base class for neural network models. */
@@ -223,7 +223,7 @@ class model {
   }
 
   /** Grab the training context of the model */
-  const execution_context& get_execution_context() const {
+  const ExecutionContext& get_execution_context() const {
     if(m_execution_context == nullptr) {
       LBANN_ERROR("execution context is not set");
     }
@@ -231,8 +231,8 @@ class model {
   }
 
   /** Grab the training context of the model */
-  execution_context& get_execution_context() {
-    return const_cast<execution_context&>(static_cast<const model&>(*this).get_execution_context());
+  ExecutionContext& get_execution_context() {
+    return const_cast<ExecutionContext&>(static_cast<const model&>(*this).get_execution_context());
   }
 
   // ===========================================
@@ -291,7 +291,7 @@ class model {
   void swap_objective_function(model& other);
 
   // ===========================================
-  // Model modification 
+  // Model modification
   // ===========================================
 
   /** @brief Insert layer in model. */
@@ -395,10 +395,10 @@ class model {
    */
   virtual void setup_layer_topology();
 
-  /** setup sub grids for the sub graph parallelism	
+  /** setup sub grids for the sub graph parallelism
 
-  */	
-  virtual void setup_subgrids();	
+  */
+  virtual void setup_subgrids();
 
   virtual void get_subgrids_order(std::vector<int> &ranks_order, int num_branches);
 
@@ -412,8 +412,8 @@ class model {
 
   virtual void get_subgraph_subgrids_ranks(std::vector<int> &parent_ranks, std::vector<int> &subgrid_ranks, int layer_index,int number_ranks_in_grid);
 
-  virtual void get_resources_for_spliting_point(std::vector<int> &parent_ranks, 
-                  std::vector<int> &subgrid_ranks, 
+  virtual void get_resources_for_spliting_point(std::vector<int> &parent_ranks,
+                  std::vector<int> &subgrid_ranks,
                   int layer_index,
                   int number_ranks_in_grid,
                   int num_subgrids);
@@ -422,7 +422,7 @@ class model {
   virtual void get_resources_for_input_layer(std::vector<int>& masterSubGrid, int num_subgrids);
 
   virtual void setup_subcommunicators();
-  
+
   /** @brief Set up layer execution order.
    *
    *  Called in setup function.
@@ -448,7 +448,7 @@ class model {
   // ===========================================
 
   /** @brief Reset model pointer and execution mode. */
-  virtual void reset_mode(execution_context& context, execution_mode mode);
+  virtual void reset_mode(ExecutionContext& context, execution_mode mode);
   /** @brief Reset model statistics for an epoch. */
   virtual void reset_epoch_statistics(execution_mode mode);
 
@@ -513,21 +513,21 @@ class model {
   size_t get_max_mini_batch_size_distconv() const { return m_max_mini_batch_size_distconv; }
 #endif
 
-	
-private:	
+
+private:
   // map to store all distinct grids in the model
-  std::unordered_map<std::string, std::shared_ptr<El::Grid>> grids; 
+  std::unordered_map<std::string, std::shared_ptr<El::Grid>> grids;
 
-  std::unordered_map<std::string, std::shared_ptr<El::mpi::Comm>> subCommunicatorsSubgrids; 
+  std::unordered_map<std::string, std::shared_ptr<El::mpi::Comm>> subCommunicatorsSubgrids;
   // map to store all distinct mpi groups in the model (one to one mapping with grids)
-  std::unordered_map<std::string, std::unique_ptr<El::mpi::Group>> grids_mpi_groups; 
+  std::unordered_map<std::string, std::unique_ptr<El::mpi::Group>> grids_mpi_groups;
 
 
 
 private:
 
   /** Pointer to the execution context object used for training or evaluating this model */
-  observer_ptr<execution_context> m_execution_context;
+  observer_ptr<ExecutionContext> m_execution_context;
 
   /** @brief LBANN communicator. */
   lbann_comm* m_comm;
@@ -535,20 +535,20 @@ class model {
   /*experimental code for Sub graph*/
   /** Enable vector communication for the subgraph parallelism */
   //0: send-recv based subgrid communication
-  //1: collective based subgrid communication without optimization that requires specific assumptions like subgrids should have same size and creates sub-communicators everytime 
+  //1: collective based subgrid communication without optimization that requires specific assumptions like subgrids should have same size and creates sub-communicators everytime
   //2: collective based subgrid communication with optimization
 
   int vector_communication_subgraph = 0;
 
   //Number of resources for parent (common) grid
-  //0: use all resources (default) 
+  //0: use all resources (default)
   int subgraph_num_resources_parent = 0;
 
   //0: no topology aware design
-  //1: master grid in round robin manner of nodes (GPUs per node 4)  1 3 5 7, 2 4 6 8     
+  //1: master grid in round robin manner of nodes (GPUs per node 4)  1 3 5 7, 2 4 6 8
   bool enable_subgraph_topology = false;
 
-  // whether subgraph parallelism is enabled or not for the model 
+  // whether subgraph parallelism is enabled or not for the model
   bool apply_subgraph_parallelism = false;
 
   // total number of resources / ranks for branch (subgrid) layers
@@ -556,7 +556,7 @@ class model {
 
   // total number of resources / ranks for common/seq layers
   int num_resources_non_branch_layers;
-  
+
   /** @brief Model instance's name.
    *  @details Each model in a trainer should have a unique,
    *  preferably human-readable, name.
diff --git a/include/lbann/trainers/trainer.hpp b/include/lbann/trainers/trainer.hpp
index 8b08efde866..b39f88b056c 100644
--- a/include/lbann/trainers/trainer.hpp
+++ b/include/lbann/trainers/trainer.hpp
@@ -32,7 +32,7 @@
 #include "lbann/data_coordinator/data_coordinator.hpp"
 #include "lbann/detect_El_mpi.hpp"
 #include "lbann/execution_algorithms/training_algorithm.hpp"
-#include "lbann/execution_contexts/execution_context.hpp"
+#include "lbann/execution_algorithms/execution_context.hpp"
 #include "lbann/io/persist.hpp"
 #include "lbann/models/model.hpp"
 #include "lbann/utils/hash.hpp"
@@ -47,8 +47,8 @@ namespace lbann {
 
 // Forward-declare this.
 class lbann_callback;
-class training_algorithm;
-class termination_criteria;
+class TrainingAlgorithm;
+class TerminationCriteria;
 
 /** @brief User-facing class that represents a set of compute resources.
  *
@@ -72,7 +72,7 @@ class trainer
   trainer(lbann_comm* comm,
           std::unique_ptr<data_coordinator> dc,
           size_t mini_batch_size,
-          std::unique_ptr<training_algorithm> alg = nullptr);
+          std::unique_ptr<TrainingAlgorithm> alg = nullptr);
 
   ~trainer();
 
@@ -206,19 +206,19 @@ class trainer
     typename std::pair<observer_ptr<model>, execution_mode>;
 
   execution_context_key_pair_t
-  check_and_build_execution_context(training_algorithm& alg,
+  check_and_build_execution_context(TrainingAlgorithm& alg,
                                     observer_ptr<model> model,
                                     execution_mode mode);
 
   execution_context_key_pair_t
-  check_and_build_execution_context(execution_context& c,
+  check_and_build_execution_context(ExecutionContext& c,
                                     model& model,
                                     execution_mode mode);
 
-  execution_context& get_execution_context(observer_ptr<model> model,
+  ExecutionContext& get_execution_context(observer_ptr<model> model,
                                            execution_mode mode);
 
-  execution_context& get_execution_context(execution_context_key_pair_t key);
+  ExecutionContext& get_execution_context(execution_context_key_pair_t key);
 
   /** @name Training and evaluation interface */
   ///@{
@@ -241,7 +241,7 @@ class trainer
   bool load_from_checkpoint_shared(persist& p);
 
   /** @brief Restore model from a shared checkpoint. */
-  bool load_from_checkpoint_shared(model& m, execution_context& c);
+  bool load_from_checkpoint_shared(model& m, ExecutionContext& c);
 
   /** @brief Create a distributed checkpoint of the trainer. */
   bool save_to_checkpoint_distributed();
@@ -250,7 +250,7 @@ class trainer
   bool load_from_checkpoint_distributed(persist& p);
 
   /** @brief Restore a model from a distributed checkpoint. */
-  bool load_from_checkpoint_distributed(model& m, execution_context& c);
+  bool load_from_checkpoint_distributed(model& m, ExecutionContext& c);
 
   /** @brief Write trainer to proto message */
   void write_proto(lbann_data::Trainer& proto);
@@ -261,7 +261,7 @@ class trainer
   void delete_execution_context(execution_context_key_pair_t key);
 
   void for_each_execution_context(
-    std::function<void(observer_ptr<execution_context>)> fn);
+    std::function<void(observer_ptr<ExecutionContext>)> fn);
 
 private:
   /** @brief Persist object used for serializing LBANN classes. */
@@ -276,7 +276,7 @@ class trainer
 
   using ModelContextMapType =
     std::unordered_map<std::pair<observer_ptr<model>, execution_mode>,
-                       std::unique_ptr<execution_context>,
+                       std::unique_ptr<ExecutionContext>,
                        model_execution_context_hash_t>;
 
   /** @brief Map from model and execution mode to its execution context */
@@ -298,7 +298,7 @@ class trainer
    *  @details If null, a different type of execution algorithm is
    *  being used (e.g., inference).
    */
-  std::unique_ptr<training_algorithm> m_training_alg;
+  std::unique_ptr<TrainingAlgorithm> m_training_alg;
 
   /** @brief Communication domain for the trainer. */
   lbann_comm* m_comm;
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 40964975701..c803e061478 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -10,7 +10,6 @@ add_subdirectory(callbacks)
 add_subdirectory(data_coordinator)
 add_subdirectory(data_readers)
 add_subdirectory(data_store)
-add_subdirectory(execution_contexts)
 add_subdirectory(io)
 add_subdirectory(layers)
 add_subdirectory(metrics)
diff --git a/src/callbacks/check_dataset.cpp b/src/callbacks/check_dataset.cpp
index e855337218c..961f2d13102 100644
--- a/src/callbacks/check_dataset.cpp
+++ b/src/callbacks/check_dataset.cpp
@@ -26,7 +26,7 @@
 
 #include "lbann/comm_impl.hpp"
 #include "lbann/callbacks/check_dataset.hpp"
-#include "lbann/execution_contexts/execution_context.hpp"
+#include "lbann/execution_algorithms/execution_context.hpp"
 #include "lbann/layers/io/input_layer.hpp"
 #include "lbann/utils/serialize.hpp"
 
diff --git a/src/callbacks/check_gradients.cpp b/src/callbacks/check_gradients.cpp
index 1d2f8cda2de..6e19310605a 100644
--- a/src/callbacks/check_gradients.cpp
+++ b/src/callbacks/check_gradients.cpp
@@ -50,7 +50,7 @@ namespace {
  *  layers. It is assumed that input layers have already loaded data.
  */
 EvalType compute_objective_function(model& m) {
-  const auto& c = static_cast<sgd_execution_context&>(m.get_execution_context());
+  const auto& c = static_cast<SGDExecutionContext&>(m.get_execution_context());
 
   // Forward prop, skipping input layers
 
@@ -60,7 +60,7 @@ EvalType compute_objective_function(model& m) {
     for (auto&& l : m.get_layers()) {
       if (dynamic_cast<input_layer<DataType>*>(l) == nullptr && l->get_run_layer_in_subgraph()) {
         l->forward_prop();
-          
+
       }
     }
   }
@@ -68,7 +68,7 @@ EvalType compute_objective_function(model& m) {
   {
     for (auto&& l : m.get_layers()) {
       if (dynamic_cast<input_layer<DataType>*>(l) == nullptr) {
-        
+
         l->forward_prop();
       }
     }
@@ -76,7 +76,7 @@ EvalType compute_objective_function(model& m) {
   }
 
 
-  
+
 
   // Get objective function value
   auto&& obj = m.get_objective_function();
@@ -105,7 +105,7 @@ struct DefaultErrorReporter
 struct CheckWeightsFunctor : DefaultErrorReporter
 {
   model &m;
-  sgd_execution_context const& c;
+  SGDExecutionContext const& c;
   EvalType epsilon;
   EvalType step_size;
   EvalType expected_error;
@@ -113,7 +113,7 @@ struct CheckWeightsFunctor : DefaultErrorReporter
   bool error_on_failure;
 
   CheckWeightsFunctor(model& arg_m,
-                      sgd_execution_context const& arg_c,
+                      SGDExecutionContext const& arg_c,
                       EvalType arg_epsilon,
                       EvalType arg_step_size,
                       EvalType arg_expected_error,
@@ -231,7 +231,7 @@ check_gradients::serialize(Archive & ar) {
 void check_gradients::do_check_gradients(model& m) const {
 
   // Get objects from model
-  auto& c = static_cast<sgd_execution_context&>(m.get_execution_context());
+  auto& c = static_cast<SGDExecutionContext&>(m.get_execution_context());
   auto& comm = *m.get_comm();
   const auto mode = c.get_execution_mode();
   const auto& layers = m.get_layers();
@@ -253,7 +253,7 @@ void check_gradients::do_check_gradients(model& m) const {
   data_coordinator& dc = get_trainer().get_data_coordinator();
   dc.fetch_data(mode);
 
-  //checking subgrpah parallelism 
+  //checking subgrpah parallelism
   if(m.is_subgraph_parallelism_enabled())
   {
     for (auto&& l : m.get_layers()) {
@@ -274,7 +274,7 @@ void check_gradients::do_check_gradients(model& m) const {
 
   }
 
-  
+
 
   // Compute objective function
   const EvalType objective = compute_objective_function(m);
@@ -303,7 +303,7 @@ void check_gradients::do_check_gradients(model& m) const {
   m.get_objective_function()->differentiate();
   m.get_objective_function()->compute_weight_regularization();
 
-  //checking subgraph parallelism 
+  //checking subgraph parallelism
   if(m.is_subgraph_parallelism_enabled())
   {
     for (El::Int i = layers.size()-1; i >= 0; --i) {
@@ -311,7 +311,7 @@ void check_gradients::do_check_gradients(model& m) const {
       {
         layers[i]->back_prop();
       }
-      
+
     }
 
   }
@@ -322,7 +322,7 @@ void check_gradients::do_check_gradients(model& m) const {
     }
 
   }
-  
+
 
   // Print objective function value
   if (comm.am_world_master()) {
diff --git a/src/callbacks/check_init.cpp b/src/callbacks/check_init.cpp
index d0d5148e297..688fbe02b58 100644
--- a/src/callbacks/check_init.cpp
+++ b/src/callbacks/check_init.cpp
@@ -62,7 +62,7 @@ void check_init::serialize(Archive & ar) {
 }
 
 void check_init::on_train_begin(model *m) {
-  const auto& c = static_cast<sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<SGDExecutionContext&>(m->get_execution_context());
   // Skip after the first epoch.
   if (c.get_epoch() != 0) {
     return;
diff --git a/src/callbacks/check_nan.cpp b/src/callbacks/check_nan.cpp
index 347b8cbe6b4..91bafca752c 100644
--- a/src/callbacks/check_nan.cpp
+++ b/src/callbacks/check_nan.cpp
@@ -100,9 +100,9 @@ struct DefaultErrorReporter
 struct DumpLayerFunctor : DefaultErrorReporter
 {
   model * m;
-  sgd_execution_context const& c;
+  SGDExecutionContext const& c;
 
-  DumpLayerFunctor(model* arg_m, sgd_execution_context const& arg_c)
+  DumpLayerFunctor(model* arg_m, SGDExecutionContext const& arg_c)
     : m(arg_m), c(arg_c)
   {}
 
@@ -130,9 +130,9 @@ struct DumpLayerFunctor : DefaultErrorReporter
 struct DumpWeightsFunctor : DefaultErrorReporter
 {
   model * m;
-  sgd_execution_context const& c;
+  SGDExecutionContext const& c;
 
-  DumpWeightsFunctor(model* arg_m, sgd_execution_context const& arg_c)
+  DumpWeightsFunctor(model* arg_m, SGDExecutionContext const& arg_c)
     : m(arg_m), c(arg_c)
   {}
 
@@ -165,7 +165,7 @@ template <typename T> using SingleTypeDataTypeLayer = data_type_layer<T, T>;
 void dump_network(model *m) {
   using ValidFPTypes = supported_layer_data_type;
 
-  const auto& c = dynamic_cast<sgd_execution_context&>(m->get_execution_context());
+  const auto& c = dynamic_cast<SGDExecutionContext&>(m->get_execution_context());
   for (auto* l : m->get_layers()) {
     using LayerTypes = h2::meta::tlist::ExpandTL<SingleTypeDataTypeLayer,
                                                  ValidFPTypes>;
diff --git a/src/callbacks/checkpoint.cpp b/src/callbacks/checkpoint.cpp
index ce038f19e1e..971331a99c1 100644
--- a/src/callbacks/checkpoint.cpp
+++ b/src/callbacks/checkpoint.cpp
@@ -99,7 +99,7 @@ void checkpoint::on_batch_begin(model *m) {
 
 // Decide if we need to trigger a checkpoint for either mode, based on prototext defined intervals
 bool checkpoint::need_checkpoint(model *m, callback_phase phase) {
-  const auto& c = static_cast<sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<SGDExecutionContext&>(m->get_execution_context());
   /* TODO: since we're using clocks, this requires a bcast for each call,
    * we could use number of samples processed to make a local decision */
   // if none of our checkpoint conditions are set, assume we're not checkpointing
@@ -154,7 +154,7 @@ bool checkpoint::need_checkpoint(model *m, callback_phase phase) {
 // Checkpoint Shared/Distributed
 bool checkpoint::do_checkpoint(model *m, visitor_hook hook) {
   auto& p = get_active_trainer().get_persist_obj();
-  auto& c = dynamic_cast<sgd_execution_context&>(m->get_execution_context());
+  auto& c = dynamic_cast<SGDExecutionContext&>(m->get_execution_context());
   auto& t = get_active_trainer();
   if(&t != &get_trainer()) { LBANN_ERROR("Mismatched trainers"); }
   // if the checkpoint directory is not defined, bail
@@ -442,7 +442,7 @@ bool checkpoint::restart(model *m) {
   // contexts exists and create a valid execution context for each
   // one.
   // Then setup the model with the proper one
-  auto& c = static_cast<sgd_execution_context&>(m->get_execution_context());
+  auto& c = static_cast<SGDExecutionContext&>(m->get_execution_context());
   return open_latest_checkpoint(
     *(m->get_comm()),
     "Restarting",
diff --git a/src/callbacks/confusion_matrix.cpp b/src/callbacks/confusion_matrix.cpp
index 12f6db5cc1a..d56ad09a13a 100644
--- a/src/callbacks/confusion_matrix.cpp
+++ b/src/callbacks/confusion_matrix.cpp
@@ -195,7 +195,7 @@ void confusion_matrix::update_counts(const model& m) {
 }
 
 void confusion_matrix::save_confusion_matrix(const model& m) {
-  const auto& c = static_cast<const sgd_execution_context&>(m.get_execution_context());
+  const auto& c = static_cast<const SGDExecutionContext&>(m.get_execution_context());
 
   // Get counts
   const auto& mode = c.get_execution_mode();
diff --git a/src/callbacks/debug.cpp b/src/callbacks/debug.cpp
index 6769141ba1e..8d2c6509311 100644
--- a/src/callbacks/debug.cpp
+++ b/src/callbacks/debug.cpp
@@ -69,7 +69,7 @@ std::string weights_string(const data_type_weights<TensorDataType>& w) {
 /** Get human-readable string describing current batch step. */
 std::string batch_step_string(const model& m) {
   const auto& c =
-    dynamic_cast<const sgd_execution_context&>(m.get_execution_context());
+    dynamic_cast<const SGDExecutionContext&>(m.get_execution_context());
   std::stringstream msg;
   const auto& mode = c.get_execution_mode();
   msg << to_string(mode) << " batch " << c.get_step();
diff --git a/src/callbacks/debug_io.cpp b/src/callbacks/debug_io.cpp
index d3891955941..b1b92ddc79b 100644
--- a/src/callbacks/debug_io.cpp
+++ b/src/callbacks/debug_io.cpp
@@ -75,7 +75,7 @@ void debug_io::on_forward_prop_begin(model *m, Layer *l) {
 }
 
 void debug_io::print_fp_start(model *m, input_layer<DataType> *input) {
-  const auto& c = static_cast<const sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<const SGDExecutionContext&>(m->get_execution_context());
   const data_coordinator& dc = get_const_trainer().get_data_coordinator();
   const auto& step = c.get_step();
   const auto mode = c.get_execution_mode();
diff --git a/src/callbacks/dump_error_signals.cpp b/src/callbacks/dump_error_signals.cpp
index 993214f0cf1..55aebacf6af 100644
--- a/src/callbacks/dump_error_signals.cpp
+++ b/src/callbacks/dump_error_signals.cpp
@@ -42,7 +42,7 @@ void dump_error_signals::serialize(Archive & ar) {
 }
 
 void dump_error_signals::on_backward_prop_end(model *m, Layer *l) {
-  const auto& c = static_cast<const sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<const SGDExecutionContext&>(m->get_execution_context());
 
   // Write each activation matrix to file
   for (int i = 0; i < l->get_num_parents(); ++i) {
diff --git a/src/callbacks/dump_gradients.cpp b/src/callbacks/dump_gradients.cpp
index 572f3d33326..75de52641ce 100644
--- a/src/callbacks/dump_gradients.cpp
+++ b/src/callbacks/dump_gradients.cpp
@@ -50,7 +50,7 @@ void dump_gradients::serialize(Archive & ar) {
 }
 
 void dump_gradients::on_backward_prop_end(model *m) {
-  const auto& c = static_cast<const sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<const SGDExecutionContext&>(m->get_execution_context());
   for (weights *w : m->get_weights()) {
     optimizer *opt = w->get_optimizer();
     if (opt != nullptr) {
diff --git a/src/callbacks/dump_minibatch_sample_indices.cpp b/src/callbacks/dump_minibatch_sample_indices.cpp
index b46fb26dc63..80534d149e7 100644
--- a/src/callbacks/dump_minibatch_sample_indices.cpp
+++ b/src/callbacks/dump_minibatch_sample_indices.cpp
@@ -53,7 +53,7 @@ void dump_minibatch_sample_indices::serialize(Archive & ar) {
 }
 
 void dump_minibatch_sample_indices::dump_to_file(model *m, Layer *l, int64_t step) {
-  const auto& c = static_cast<const sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<const SGDExecutionContext&>(m->get_execution_context());
   // Print minibatch sample indices of the data coordinator
   data_coordinator& dc = get_trainer().get_data_coordinator();
   El::Matrix<El::Int>* indices = dc.get_sample_indices_per_mb(c.get_execution_mode());
diff --git a/src/callbacks/dump_outputs.cpp b/src/callbacks/dump_outputs.cpp
index dcca97f6ac9..e8b89af78ee 100644
--- a/src/callbacks/dump_outputs.cpp
+++ b/src/callbacks/dump_outputs.cpp
@@ -153,7 +153,7 @@ void dump_outputs::serialize(Archive & ar) {
 }
 
 void dump_outputs::do_dump_outputs(const model& m, const Layer& l) {
-  const auto& c = static_cast<const sgd_execution_context&>(m.get_execution_context());
+  const auto& c = static_cast<const SGDExecutionContext&>(m.get_execution_context());
 
   // Get mini-batch step information
   const auto& mode = c.get_execution_mode();
diff --git a/src/callbacks/dump_weights.cpp b/src/callbacks/dump_weights.cpp
index 2f229a4ae80..e483fcff6f7 100644
--- a/src/callbacks/dump_weights.cpp
+++ b/src/callbacks/dump_weights.cpp
@@ -257,14 +257,14 @@ void dump_weights::on_train_begin(model *m) {
 }
 
 void dump_weights::on_epoch_end(model *m) {
-  const auto& context = static_cast<const sgd_execution_context&>(m->get_execution_context());
+  const auto& context = static_cast<const SGDExecutionContext&>(m->get_execution_context());
   if (context.get_epoch() % m_epoch_interval == 0) {
     do_dump_weights(*m, visitor_hook::epoch_end);
   }
 }
 
 void dump_weights::do_dump_weights(const model& m, visitor_hook hook) {
-  const auto& context = static_cast<const sgd_execution_context&>(m.get_execution_context());
+  const auto& context = static_cast<const SGDExecutionContext&>(m.get_execution_context());
   const auto& t = get_const_trainer();
 
   // Create directory for weight files
diff --git a/src/callbacks/early_stopping.cpp b/src/callbacks/early_stopping.cpp
index a58335cfb95..327b85e48f9 100644
--- a/src/callbacks/early_stopping.cpp
+++ b/src/callbacks/early_stopping.cpp
@@ -56,7 +56,7 @@ void early_stopping::serialize(Archive & ar) {
 /// Monitor the objective function to see if the validation score
 /// continues to improve
 void early_stopping::on_validation_end(model *m) {
-  auto& c = dynamic_cast<sgd_execution_context&>(m->get_execution_context());
+  auto& c = dynamic_cast<SGDExecutionContext&>(m->get_execution_context());
   execution_mode mode = c.get_execution_mode();
   EvalType score = m->get_objective_function()->get_mean_value(mode);
   if (score < m_last_score) {
diff --git a/src/callbacks/learning_rate.cpp b/src/callbacks/learning_rate.cpp
index 9a795d49ef8..5a9e5bc8443 100644
--- a/src/callbacks/learning_rate.cpp
+++ b/src/callbacks/learning_rate.cpp
@@ -79,7 +79,7 @@ void learning_rate::setup(model *m) {
 }
 
 void learning_rate::on_epoch_end(model *m) {
-  const auto& c = static_cast<sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<SGDExecutionContext&>(m->get_execution_context());
   const float new_lr = global_schedule(m);
   const float old_global_lr = m_cur_global_lr;
   m_cur_global_lr = new_lr;
@@ -123,7 +123,7 @@ step_learning_rate::step_learning_rate(
   m_step(step), m_amt(amt) {}
 
 float step_learning_rate::global_schedule(model *m) {
-  const auto& c = static_cast<sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<SGDExecutionContext&>(m->get_execution_context());
   if (c.get_epoch() % m_step == 0) {
     return step_learning_rate::get_current_global_learning_rate() * m_amt;
   } else {
@@ -142,7 +142,7 @@ adaptive_learning_rate::adaptive_learning_rate(
   m_patience(patience), m_amt(amt) {}
 
 float adaptive_learning_rate::global_schedule(model *m) {
-  const auto& c = static_cast<sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<SGDExecutionContext&>(m->get_execution_context());
   // Determine behavior the first time this is called in an epoch
   if (m_cur_epoch != c.get_epoch()) {
     m_cur_epoch = c.get_epoch();
@@ -187,7 +187,7 @@ drop_fixed_learning_rate::drop_fixed_learning_rate(
 }
 
 float drop_fixed_learning_rate::global_schedule(model* m) {
-  const auto& c = static_cast<sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<SGDExecutionContext&>(m->get_execution_context());
   // Delete last drop epoch if we have already passed it
   while (!m_drop_epochs.empty()
          && c.get_epoch() > m_drop_epochs.back()) {
@@ -230,7 +230,7 @@ void linear_growth_learning_rate::setup(model *m) {
 }
 
 float linear_growth_learning_rate::global_schedule(model *m) {
-  const auto& c = static_cast<sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<SGDExecutionContext&>(m->get_execution_context());
   if (c.get_epoch() < m_delay) {
     return linear_growth_learning_rate::get_current_global_learning_rate();
   } else if (c.get_epoch() <= m_num_epochs + m_delay) {
@@ -286,7 +286,7 @@ float poly_learning_rate::global_schedule(model *m) {
  * Compute the learning rate for the next iteration.
  */
 float poly_learning_rate::optimizer_schedule(model *m, optimizer &opt) {
-  const auto& c = static_cast<const sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<const SGDExecutionContext&>(m->get_execution_context());
   const size_t cur_iter = c.get_step();
   if (m_max_iter > cur_iter) {
     m_lr = static_cast<float>(std::pow(static_cast<double>(m_max_iter - cur_iter)/m_max_iter, m_p));
diff --git a/src/callbacks/mixup.cpp b/src/callbacks/mixup.cpp
index 2d0a68a358c..55d28962538 100644
--- a/src/callbacks/mixup.cpp
+++ b/src/callbacks/mixup.cpp
@@ -57,7 +57,7 @@ void mixup::on_forward_prop_end(model *m, Layer *l) {
     return;
   }
   const auto& c =
-    dynamic_cast<const sgd_execution_context&>(m->get_execution_context());
+    dynamic_cast<const SGDExecutionContext&>(m->get_execution_context());
   if (c.get_execution_mode() != execution_mode::training) {
     return;  // No mixup outside of training.
   }
diff --git a/src/callbacks/monitor_io.cpp b/src/callbacks/monitor_io.cpp
index b96f74de55b..bf8c06be406 100644
--- a/src/callbacks/monitor_io.cpp
+++ b/src/callbacks/monitor_io.cpp
@@ -47,7 +47,7 @@ void monitor_io::serialize(Archive & ar) {
 }
 
 void monitor_io::on_epoch_end(model *m) {
-  const auto& c = static_cast<const sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<const SGDExecutionContext&>(m->get_execution_context());
   const data_coordinator& dc = get_const_trainer().get_data_coordinator();
   lbann_comm *comm = m->get_comm();
   std::cout << "Rank " << comm->get_trainer_rank() << "."
@@ -58,7 +58,7 @@ void monitor_io::on_epoch_end(model *m) {
 }
 
 void monitor_io::on_test_end(model *m) {
-  const auto& c = static_cast<const sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<const SGDExecutionContext&>(m->get_execution_context());
   const data_coordinator& dc = get_const_trainer().get_data_coordinator();
   lbann_comm *comm = m->get_comm();
   std::cout << "Rank " << comm->get_trainer_rank() << "."
diff --git a/src/callbacks/print_statistics.cpp b/src/callbacks/print_statistics.cpp
index 76e0d9ee2e1..aae24784d94 100644
--- a/src/callbacks/print_statistics.cpp
+++ b/src/callbacks/print_statistics.cpp
@@ -65,7 +65,7 @@ void print_statistics::setup(model *m) {
 }
 
 void print_statistics::on_epoch_begin(model *m) {
-  const auto& c = static_cast<const sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<const SGDExecutionContext&>(m->get_execution_context());
   data_coordinator& dc = get_trainer().get_data_coordinator();
   lbann_comm *comm = m->get_comm();
   if (comm->am_world_master()) {
@@ -154,7 +154,7 @@ void print_statistics::on_test_end(model *m) {
 }
 
 void print_statistics::report_results(model *m) {
-  const auto& c = static_cast<const sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<const SGDExecutionContext&>(m->get_execution_context());
   lbann_comm *comm = m->get_comm();
 
   // Get string for execution mode
diff --git a/src/callbacks/profiler.cpp b/src/callbacks/profiler.cpp
index f08e10629d6..3dd6abc5627 100644
--- a/src/callbacks/profiler.cpp
+++ b/src/callbacks/profiler.cpp
@@ -65,7 +65,7 @@ void profiler::serialize(Archive & ar) {
 }
 
 void profiler::on_epoch_begin(model *m) {
-  const auto& c = static_cast<sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<SGDExecutionContext&>(m->get_execution_context());
   // Skip the first epoch
   if (m_skip_init && c.get_epoch() == 1) {
     prof_start();
@@ -75,31 +75,31 @@ void profiler::on_epoch_begin(model *m) {
 }
 
 void profiler::on_epoch_end(model *m) {
-  const auto& c = static_cast<sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<SGDExecutionContext&>(m->get_execution_context());
   prof_region_end(("epoch " + std::to_string(c.get_epoch())).c_str(),
                   m_sync);
 }
 
 void profiler::on_validation_begin(model *m) {
-  const auto& c = static_cast<sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<SGDExecutionContext&>(m->get_execution_context());
   prof_region_begin(("val " + std::to_string(c.get_epoch())).c_str(),
                     prof_colors[0], m_sync);
 }
 
 void profiler::on_validation_end(model *m) {
-  const auto& c = static_cast<sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<SGDExecutionContext&>(m->get_execution_context());
   prof_region_end(("val " + std::to_string(c.get_epoch())).c_str(),
                   m_sync);
 }
 
 void profiler::on_test_begin(model *m) {
-  const auto& c = static_cast<sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<SGDExecutionContext&>(m->get_execution_context());
   prof_region_begin(("test " + std::to_string(c.get_epoch())).c_str(),
                     prof_colors[0], m_sync);
 }
 
 void profiler::on_test_end(model *m) {
-  const auto& c = static_cast<sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<SGDExecutionContext&>(m->get_execution_context());
   prof_region_end(("test " + std::to_string(c.get_epoch())).c_str(),
                   m_sync);
 }
diff --git a/src/callbacks/save_images.cpp b/src/callbacks/save_images.cpp
index c16fa2bd8cc..74d8e6bad26 100644
--- a/src/callbacks/save_images.cpp
+++ b/src/callbacks/save_images.cpp
@@ -157,7 +157,7 @@ void save_images::serialize(Archive & ar) {
 }
 
 void save_images::on_epoch_end(model *m) {
-  const auto& c = static_cast<sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<SGDExecutionContext&>(m->get_execution_context());
   save_image(build_string(m_image_prefix, "epoch", c.get_epoch()),
              m_image_format,
              m->get_layers(),
diff --git a/src/callbacks/save_model.cpp b/src/callbacks/save_model.cpp
index 91f77854152..6d6b9064827 100644
--- a/src/callbacks/save_model.cpp
+++ b/src/callbacks/save_model.cpp
@@ -93,7 +93,7 @@ bool save_model::do_save_model(model *m) {
 
 // Save model weights
 bool save_model::do_save_model_weights(model *m) {
-  const auto& c = static_cast<sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<SGDExecutionContext&>(m->get_execution_context());
   // if the checkpoint directory is not defined, bail
   if (m_dir.length() == 0) {
     return false;
diff --git a/src/callbacks/save_topk_models.cpp b/src/callbacks/save_topk_models.cpp
index c49b03b595c..aaab223b4d2 100644
--- a/src/callbacks/save_topk_models.cpp
+++ b/src/callbacks/save_topk_models.cpp
@@ -49,7 +49,7 @@ void save_topk_models::on_test_end(model *m) {
 }
 
 bool save_topk_models::am_in_topk(model *m) {
-  const auto& c = static_cast<const execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<const ExecutionContext&>(m->get_execution_context());
   lbann_comm *comm = m->get_comm();
   const int num_trainers = comm->get_num_trainers();
   std::string mode_string = "test";
diff --git a/src/callbacks/summarize_images.cpp b/src/callbacks/summarize_images.cpp
index 0b5f47fdd52..09bbb2be2b3 100644
--- a/src/callbacks/summarize_images.cpp
+++ b/src/callbacks/summarize_images.cpp
@@ -90,7 +90,7 @@ std::vector<std::pair<size_t, El::Int>>
 categorical_accuracy_strategy::get_image_indices(model const& m) const {
   static size_t img_counter = 0;
   static size_t epoch_counter = 0;
-  auto const& exe_ctx = dynamic_cast<sgd_execution_context const&>(m.get_execution_context());
+  auto const& exe_ctx = dynamic_cast<SGDExecutionContext const&>(m.get_execution_context());
   if(exe_ctx.get_epoch() > epoch_counter){
     epoch_counter++;
     img_counter = 0;
@@ -279,7 +279,7 @@ summarize_images::summarize_images(std::shared_ptr<lbann_summary> const& summari
 
 void summarize_images::on_batch_evaluate_end(model* m) {
 
-  auto const& exe_ctx = dynamic_cast<sgd_execution_context const&>(m->get_execution_context());
+  auto const& exe_ctx = dynamic_cast<SGDExecutionContext const&>(m->get_execution_context());
   if (exe_ctx.get_epoch() % m_epoch_interval != 0)
     return;
 
@@ -311,7 +311,7 @@ void summarize_images::dump_images_to_summary(model const& m) const {
           "Column index ", col_index, " is greater than Matrix width ",
           local_images.Width());
       }
-      auto const& exe_ctx = dynamic_cast<sgd_execution_context const&>(
+      auto const& exe_ctx = dynamic_cast<SGDExecutionContext const&>(
         m.get_execution_context());
       auto image_tag =  m_strategy->get_tag(m_img_source_layer_name,
                                             sample_index, exe_ctx.get_epoch());
diff --git a/src/callbacks/timer.cpp b/src/callbacks/timer.cpp
index cc156bd09e9..264ec43c478 100644
--- a/src/callbacks/timer.cpp
+++ b/src/callbacks/timer.cpp
@@ -72,7 +72,7 @@ void timer::timing_begin(const model& m) {
 }
 
 void timer::timing_end(model& m) {
-  const auto& c = static_cast<sgd_execution_context&>(m.get_execution_context());
+  const auto& c = static_cast<SGDExecutionContext&>(m.get_execution_context());
   constexpr EvalType zero = 0;
 
   // Get run time
diff --git a/src/callbacks/variable_minibatch.cpp b/src/callbacks/variable_minibatch.cpp
index dcf8f628d4d..66a8d542c60 100644
--- a/src/callbacks/variable_minibatch.cpp
+++ b/src/callbacks/variable_minibatch.cpp
@@ -45,7 +45,7 @@ variable_minibatch::variable_minibatch(
 
 void variable_minibatch::on_train_begin(model *m) {
   // Avoid issues with the train method being called multiple times.
-  const auto& c = static_cast<const sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<const SGDExecutionContext&>(m->get_execution_context());
   if (c.get_epoch() != 0) { return; }
   const auto& t = get_const_trainer();
 
@@ -71,7 +71,7 @@ void variable_minibatch::on_train_begin(model *m) {
 }
 
 void variable_minibatch::on_epoch_end(model *m) {
-  const auto& c = static_cast<const sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<const SGDExecutionContext&>(m->get_execution_context());
   const auto& t = get_const_trainer();
 
   // Get first input layer in model
@@ -161,7 +161,7 @@ step_minibatch::step_minibatch(
 
 bool step_minibatch::schedule(
   model *m, size_t& new_mbsize, float& new_lr, size_t& ramp_time) {
-  const auto& c = static_cast<const sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<const SGDExecutionContext&>(m->get_execution_context());
   if (c.get_epoch() % m_step == 0) {
     new_mbsize = m_current_mini_batch_size * 2;
     new_lr = get_current_learning_rate(m) * 2;
@@ -183,7 +183,7 @@ minibatch_schedule::minibatch_schedule(
 
 bool minibatch_schedule::schedule(
   model *m, size_t& new_mbsize, float& new_lr, size_t& ramp_time) {
-  const auto& c = static_cast<const sgd_execution_context&>(m->get_execution_context());
+  const auto& c = static_cast<const SGDExecutionContext&>(m->get_execution_context());
   if (!m_steps.empty() && c.get_epoch() == m_steps.back().epoch) {
     new_mbsize = m_steps.back().mbsize;
     new_lr = m_steps.back().lr;
diff --git a/src/data_readers/data_reader.cpp b/src/data_readers/data_reader.cpp
index fb9b3f2fdba..3fc80f2e90d 100644
--- a/src/data_readers/data_reader.cpp
+++ b/src/data_readers/data_reader.cpp
@@ -29,7 +29,7 @@
 #include "lbann/comm_impl.hpp"
 #include "lbann/data_readers/data_reader.hpp"
 #include "lbann/data_store/data_store_conduit.hpp"
-#include "lbann/execution_contexts/sgd_execution_context.hpp"
+#include "lbann/execution_algorithms/sgd_execution_context.hpp"
 #include "lbann/io/persist.hpp"
 #include "lbann/io/persist_impl.hpp"
 #include "lbann/trainers/trainer.hpp"
@@ -704,7 +704,7 @@ bool generic_data_reader::data_store_active() const {
     return true;
   }
 
-  const auto& c = static_cast<const sgd_execution_context&>(get_trainer().get_data_coordinator().get_execution_context());
+  const auto& c = static_cast<const SGDExecutionContext&>(get_trainer().get_data_coordinator().get_execution_context());
   /// Use the data store for all modes except testing
   /// i.e. training, validation, tournament
   return (m_data_store != nullptr
@@ -715,7 +715,7 @@ bool generic_data_reader::data_store_active() const {
 }
 
 bool generic_data_reader::priming_data_store() const {
-  const auto& c = static_cast<const sgd_execution_context&>(get_trainer().get_data_coordinator().get_execution_context());
+  const auto& c = static_cast<const SGDExecutionContext&>(get_trainer().get_data_coordinator().get_execution_context());
   if (m_data_store != nullptr && m_data_store->is_fully_loaded()) {
     return false;
   }
diff --git a/src/data_readers/data_reader_jag_conduit.cpp b/src/data_readers/data_reader_jag_conduit.cpp
index e3a75a52d7f..c4ec9d625fb 100644
--- a/src/data_readers/data_reader_jag_conduit.cpp
+++ b/src/data_readers/data_reader_jag_conduit.cpp
@@ -29,7 +29,7 @@
 #include "lbann/data_readers/data_reader_jag_conduit.hpp"
 #include "lbann/data_store/data_store_conduit.hpp"
 #include "lbann/trainers/trainer.hpp"
-#include "lbann/execution_contexts/sgd_execution_context.hpp"
+#include "lbann/execution_algorithms/sgd_execution_context.hpp"
 #include "lbann/utils/lbann_library.hpp"
 #include "lbann/utils/serialize.hpp"
 #include "lbann/utils/vision.hpp"
@@ -1452,7 +1452,7 @@ bool data_reader_jag_conduit::fetch_datum(CPUMat& X, int data_id, int mb_idx) {
 }
 
 bool data_reader_jag_conduit::fetch_response(CPUMat& X, int data_id, int mb_idx) {
-  const auto& c = static_cast<const sgd_execution_context&>(get_trainer().get_data_coordinator().get_execution_context());
+  const auto& c = static_cast<const SGDExecutionContext&>(get_trainer().get_data_coordinator().get_execution_context());
   int tid = m_io_thread_pool->get_local_thread_id();
   std::vector<size_t> sizes = get_linearized_response_sizes();
   std::vector<CPUMat> X_v = create_datum_views(X, sizes, mb_idx);
diff --git a/src/data_readers/data_reader_numpy_npz_conduit.cpp b/src/data_readers/data_reader_numpy_npz_conduit.cpp
index 12c2cfadad3..d576846587c 100644
--- a/src/data_readers/data_reader_numpy_npz_conduit.cpp
+++ b/src/data_readers/data_reader_numpy_npz_conduit.cpp
@@ -263,7 +263,7 @@ bool numpy_npz_conduit_reader::fetch_datum(Mat& X, int data_id, int mb_idx) {
     load_npz(m_filenames[data_id], data_id, node);
     //note: if testing, and test set is touched more than once, the following
     //      will through an exception TODO: relook later
-    const auto& c = static_cast<const execution_context&>(get_trainer().get_data_coordinator().get_execution_context());
+    const auto& c = static_cast<const ExecutionContext&>(get_trainer().get_data_coordinator().get_execution_context());
     if (priming_data_store() || c.get_execution_mode() == execution_mode::testing) {
       m_data_store->set_conduit_node(data_id, node);
     }
diff --git a/src/execution_algorithms/CMakeLists.txt b/src/execution_algorithms/CMakeLists.txt
index 2ec335d9599..fc6da9db2d4 100644
--- a/src/execution_algorithms/CMakeLists.txt
+++ b/src/execution_algorithms/CMakeLists.txt
@@ -1,8 +1,10 @@
 # Add the source files for this directory
 set_full_path(THIS_DIR_SOURCES
+  execution_context.cpp
   factory.cpp
   kfac.cpp
   ltfb.cpp
+  sgd_execution_context.cpp
   sgd_training_algorithm.cpp
   training_algorithm.cpp
   )
diff --git a/src/execution_contexts/execution_context.cpp b/src/execution_algorithms/execution_context.cpp
similarity index 83%
rename from src/execution_contexts/execution_context.cpp
rename to src/execution_algorithms/execution_context.cpp
index 7f54daeed7e..4bedb8576e5 100644
--- a/src/execution_contexts/execution_context.cpp
+++ b/src/execution_algorithms/execution_context.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -24,7 +24,7 @@
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
 
-#include "lbann/execution_contexts/execution_context.hpp"
+#include "lbann/execution_algorithms/execution_context.hpp"
 #include "lbann/utils/serialize.hpp"
 
 namespace lbann {
@@ -33,14 +33,14 @@ namespace lbann {
 // Execution context
 //******************************************************************************
 
-execution_context::execution_context() : m_step{0UL} {}
+ExecutionContext::ExecutionContext() : m_step{0UL} {}
 
-template <class Archive> void execution_context::serialize(Archive& ar)
+template <class Archive> void ExecutionContext::serialize(Archive& ar)
 {
   ar(CEREAL_NVP(m_step));
 }
 
 } // namespace lbann
 
-#define LBANN_CLASS_NAME execution_context
+#define LBANN_CLASS_NAME ExecutionContext
 #include <lbann/macros/register_class_with_cereal.hpp>
diff --git a/src/execution_algorithms/factory.cpp b/src/execution_algorithms/factory.cpp
index 58136e216cf..1ce755e6ccf 100644
--- a/src/execution_algorithms/factory.cpp
+++ b/src/execution_algorithms/factory.cpp
@@ -39,7 +39,7 @@ namespace {
 lbann::TrainingAlgorithmFactory build_default_factory()
 {
   lbann::TrainingAlgorithmFactory fact;
-  fact.register_builder("SGD", lbann::make<lbann::sgd_training_algorithm>);
+  fact.register_builder("SGD", lbann::make<lbann::SGDTrainingAlgorithm>);
   fact.register_builder("LTFB", lbann::make<lbann::LTFB>);
   fact.register_builder("KFAC", lbann::make<lbann::KFAC>);
   return fact;
@@ -60,8 +60,8 @@ void lbann::register_new_training_algorithm(TrainingAlgorithmKey key,
 }
 
 template <>
-std::unique_ptr<lbann::training_algorithm>
-lbann::make_abstract<lbann::training_algorithm>(
+std::unique_ptr<lbann::TrainingAlgorithm>
+lbann::make_abstract<lbann::TrainingAlgorithm>(
   google::protobuf::Message const& params)
 {
   auto const& algo_params =
diff --git a/src/execution_algorithms/kfac.cpp b/src/execution_algorithms/kfac.cpp
index 9dd6f31d4c4..2115850d7ac 100644
--- a/src/execution_algorithms/kfac.cpp
+++ b/src/execution_algorithms/kfac.cpp
@@ -135,9 +135,9 @@ KFAC& KFAC::operator=(KFAC const& other) {
 
 std::string KFAC::get_type() const { return "KFAC"; }
 
-kfac::ExecutionContext* KFAC::do_get_new_execution_context() const
+kfac::KFACExecutionContext* KFAC::do_get_new_execution_context() const
 {
-  return new kfac::ExecutionContext(
+  return new kfac::KFACExecutionContext(
     0UL,
     m_damping_act_params[0],
     m_damping_err_params[0],
@@ -150,7 +150,7 @@ kfac::ExecutionContext* KFAC::do_get_new_execution_context() const
 // =============================================
 
 void KFAC::apply(
-  execution_context& context_,
+  ExecutionContext& context_,
   model& model,
   data_coordinator& dc,
   execution_mode mode)
@@ -160,7 +160,7 @@ void KFAC::apply(
     train(context, model, dc, *m_stopping_criteria);
   }
   else {
-    sgd_training_algorithm eval_algo(
+    SGDTrainingAlgorithm eval_algo(
       this->get_name()+"_eval",
       m_stopping_criteria->clone());
     auto& eval_context = context.get_sgd_execution_context();
@@ -230,7 +230,7 @@ void KFAC::train(
         // its own context that we needn't know about.
         if (dc.is_execution_mode_valid(execution_mode::validation)) {
           const execution_mode eval_mode = execution_mode::validation;
-          sgd_execution_context eval_context(
+          SGDExecutionContext eval_context(
             eval_mode,
             dc.get_mini_batch_size(eval_mode));
           // FIXME (trb 05/05/2021): This hacks around a bad assumption
@@ -241,9 +241,9 @@ void KFAC::train(
             eval_context.inc_epoch();
             ++num_validation_epochs;
           }
-          sgd_training_algorithm eval_algo(
+          SGDTrainingAlgorithm eval_algo(
             this->get_name()+"_eval",
-            make_unique<epoch_termination_criteria>(num_validation_epochs));
+            make_unique<EpochTerminationCriteria>(num_validation_epochs));
           eval_algo.apply(eval_context, model, dc, eval_mode);
 
           // FIXME (trb 06/07/21): The early stopping callback is part
@@ -417,8 +417,8 @@ void KFAC::do_epoch_end_cbs(model& model)
 
 void KFAC::do_batch_begin_cbs(model& model)
 {
-  sgd_execution_context& c =
-    static_cast<sgd_execution_context&>(model.get_execution_context());
+  SGDExecutionContext& c =
+    static_cast<SGDExecutionContext&>(model.get_execution_context());
   for (const auto& cb : model.get_callbacks()) {
     if (c.get_step() % cb->get_batch_interval() == 0) {
       cb->on_batch_begin(&model);
@@ -428,8 +428,8 @@ void KFAC::do_batch_begin_cbs(model& model)
 
 void KFAC::do_batch_end_cbs(model& model)
 {
-  sgd_execution_context& c =
-    static_cast<sgd_execution_context&>(model.get_execution_context());
+  SGDExecutionContext& c =
+    static_cast<SGDExecutionContext&>(model.get_execution_context());
   for (const auto& cb : model.get_callbacks()) {
     if (c.get_step() % cb->get_batch_interval() == 0) {
       cb->on_batch_end(&model);
@@ -1109,18 +1109,18 @@ std::unique_ptr<lbann::KFAC> lbann::make<lbann::KFAC>(
   // SGD parameters
   auto const& sgd_params = kfac_params.sgd();
   auto const& stopping_criteria = sgd_params.stopping_criteria();
-  std::unique_ptr<lbann::sgd_termination_criteria> stopping;
+  std::unique_ptr<SGDTerminationCriteria> stopping;
   switch (stopping_criteria.criterion_case()) {
   case lbann_data::SGD::TerminationCriteria::kMaxBatches:
-    stopping = lbann::make_unique<lbann::batch_termination_criteria>(
+    stopping = make_unique<BatchTerminationCriteria>(
       stopping_criteria.max_batches());
     break;
   case lbann_data::SGD::TerminationCriteria::kMaxEpochs:
-    stopping = lbann::make_unique<lbann::epoch_termination_criteria>(
+    stopping = make_unique<EpochTerminationCriteria>(
       stopping_criteria.max_epochs());
     break;
   case lbann_data::SGD::TerminationCriteria::kMaxSeconds:
-    stopping = lbann::make_unique<lbann::seconds_termination_criteria>(
+    stopping = make_unique<SecondsTerminationCriteria>(
       stopping_criteria.max_seconds());
     //LBANN_ERROR("Time-based training not yet supported in SGD.");
     break;
diff --git a/src/execution_algorithms/kfac/execution_context.cpp b/src/execution_algorithms/kfac/execution_context.cpp
index a2d7d7a4bd2..147f9438f1b 100644
--- a/src/execution_algorithms/kfac/execution_context.cpp
+++ b/src/execution_algorithms/kfac/execution_context.cpp
@@ -33,7 +33,7 @@ namespace kfac {
 // Life cycle
 // =============================================
 
-ExecutionContext::ExecutionContext(
+KFACExecutionContext::KFACExecutionContext(
   size_t mini_batch_size,
   double damping_act,
   double damping_err,
@@ -46,26 +46,26 @@ ExecutionContext::ExecutionContext(
     m_damping_bn_err{damping_bn_err}
 {}
 
-std::unique_ptr<lbann::execution_context> ExecutionContext::get_new() const
+std::unique_ptr<lbann::ExecutionContext> KFACExecutionContext::get_new() const
 {
-    return std::make_unique<ExecutionContext>(0UL, 0.0, 0.0, 0.0, 0.0);
+    return std::make_unique<KFACExecutionContext>(0UL, 0.0, 0.0, 0.0, 0.0);
 }
 
 // =============================================
 // Accessors
 // =============================================
 
-std::string ExecutionContext::get_type() const
+std::string KFACExecutionContext::get_type() const
 {
   return "KFAC";
 }
 
-std::string ExecutionContext::get_state_string() const noexcept
+std::string KFACExecutionContext::get_state_string() const noexcept
 {
   return build_string(this->get_type(), ".step.", m_sgd_execution_context.get_step());
 }
 
-El::Matrix<DataType,Device>& ExecutionContext::get_workspace_matrix(
+El::Matrix<DataType,Device>& KFACExecutionContext::get_workspace_matrix(
   const std::string& key,
   const size_t height,
   const size_t width) {
@@ -93,19 +93,19 @@ El::Matrix<DataType,Device>& ExecutionContext::get_workspace_matrix(
 // Checkpointing and serialization
 // =============================================
 
-void ExecutionContext::save_to_checkpoint_shared(persist& p)
+void KFACExecutionContext::save_to_checkpoint_shared(persist& p)
 {
   LBANN_ERROR("TODO: Not yet implemented.");
 }
-void ExecutionContext::load_from_checkpoint_shared(persist& p)
+void KFACExecutionContext::load_from_checkpoint_shared(persist& p)
 {
   LBANN_ERROR("TODO: Not yet implemented.");
 }
-void ExecutionContext::save_to_checkpoint_distributed(persist& p)
+void KFACExecutionContext::save_to_checkpoint_distributed(persist& p)
 {
   LBANN_ERROR("TODO: Not yet implemented.");
 }
-void ExecutionContext::load_from_checkpoint_distributed(persist& p)
+void KFACExecutionContext::load_from_checkpoint_distributed(persist& p)
 {
   LBANN_ERROR("TODO: Not yet implemented.");
 }
diff --git a/src/execution_algorithms/ltfb.cpp b/src/execution_algorithms/ltfb.cpp
index ff5eede8126..dd38158a803 100644
--- a/src/execution_algorithms/ltfb.cpp
+++ b/src/execution_algorithms/ltfb.cpp
@@ -38,7 +38,7 @@
 
 namespace lbann {
 
-void LTFB::apply(execution_context& context,
+void LTFB::apply(ExecutionContext& context,
                  model& m,
                  data_coordinator& dc,
                  execution_mode /*mode*/)
@@ -90,7 +90,7 @@ lbann::make<lbann::LTFB>(google::protobuf::Message const& msg_in)
   auto const& stopping = params.stopping_criteria();
   return make_unique<LTFB>(
     msg.name(),
-    make_abstract<training_algorithm>(params.local_training_algorithm()),
+    make_abstract<TrainingAlgorithm>(params.local_training_algorithm()),
     make_abstract<ltfb::MetaLearningStrategy>(params.meta_learning_strategy()),
-    ltfb::TerminationCriteria{stopping.max_tournaments()});
+    ltfb::LTFBTerminationCriteria{stopping.max_tournaments()});
 }
diff --git a/src/execution_algorithms/ltfb/random_pairwise_exchange.cpp b/src/execution_algorithms/ltfb/random_pairwise_exchange.cpp
index ca31cab5d47..7cae0342d21 100644
--- a/src/execution_algorithms/ltfb/random_pairwise_exchange.cpp
+++ b/src/execution_algorithms/ltfb/random_pairwise_exchange.cpp
@@ -164,7 +164,7 @@ RandomPairwiseExchange::RandomPairwiseExchange(
 
 std::unordered_map<std::string, EvalType>
 RandomPairwiseExchange::evaluate_model(model& m,
-                                       ExecutionContext& ctxt,
+                                       LTFBExecutionContext& ctxt,
                                        data_coordinator& dc) const
 {
   // Make sure data readers finish asynchronous work
@@ -270,7 +270,7 @@ bool RandomPairwiseExchange::local_is_better(
 }
 
 void RandomPairwiseExchange::select_next(model& m,
-                                         ltfb::ExecutionContext& ctxt,
+                                         ltfb::LTFBExecutionContext& ctxt,
                                          data_coordinator& dc) const
 {
   auto const& comm = *(m.get_comm());
diff --git a/src/execution_algorithms/ltfb/regularized_evolution.cpp b/src/execution_algorithms/ltfb/regularized_evolution.cpp
index 2d94ce122d3..d785e6b5bd0 100644
--- a/src/execution_algorithms/ltfb/regularized_evolution.cpp
+++ b/src/execution_algorithms/ltfb/regularized_evolution.cpp
@@ -75,7 +75,7 @@ RegularizedEvolution::RegularizedEvolution(RegularizedEvolution const& other)
 {}
 
 EvalType RegularizedEvolution::evaluate_model(model& m,
-                                              ExecutionContext& ctxt,
+                                              LTFBExecutionContext& ctxt,
                                               data_coordinator& dc) const
 
 {
@@ -129,7 +129,7 @@ EvalType RegularizedEvolution::evaluate_model(model& m,
 }
 
 void RegularizedEvolution::select_next(model& m,
-                                       ltfb::ExecutionContext& ctxt,
+                                       ltfb::LTFBExecutionContext& ctxt,
                                        data_coordinator& dc) const
 {
   auto const& comm = *(m.get_comm());
diff --git a/src/execution_algorithms/ltfb/truncation_selection_exchange.cpp b/src/execution_algorithms/ltfb/truncation_selection_exchange.cpp
index b1877f13d10..c6b583c41d2 100644
--- a/src/execution_algorithms/ltfb/truncation_selection_exchange.cpp
+++ b/src/execution_algorithms/ltfb/truncation_selection_exchange.cpp
@@ -94,7 +94,7 @@ TruncationSelectionExchange::TruncationSelectionExchange(
 {}
 
 EvalType TruncationSelectionExchange::evaluate_model(model& m,
-                                                     ExecutionContext& ctxt,
+                                                     LTFBExecutionContext& ctxt,
                                                      data_coordinator& dc) const
 {
   // Make sure data readers finish asynchronous work
@@ -147,7 +147,7 @@ EvalType TruncationSelectionExchange::evaluate_model(model& m,
 }
 
 void TruncationSelectionExchange::select_next(model& m,
-                                              ltfb::ExecutionContext& ctxt,
+                                              ltfb::LTFBExecutionContext& ctxt,
                                               data_coordinator& dc) const
 {
   auto const& comm = *(m.get_comm());
diff --git a/src/execution_contexts/sgd_execution_context.cpp b/src/execution_algorithms/sgd_execution_context.cpp
similarity index 77%
rename from src/execution_contexts/sgd_execution_context.cpp
rename to src/execution_algorithms/sgd_execution_context.cpp
index 31b45db8127..68ceb800f75 100644
--- a/src/execution_contexts/sgd_execution_context.cpp
+++ b/src/execution_algorithms/sgd_execution_context.cpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-// Copyright (c) 2014-2016, Lawrence Livermore National Security, LLC.
+// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
@@ -24,7 +24,7 @@
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
 
-#include "lbann/execution_contexts/sgd_execution_context.hpp"
+#include "lbann/execution_algorithms/sgd_execution_context.hpp"
 #include "lbann/base.hpp"
 #include "lbann/io/persist_impl.hpp"
 #include "lbann/trainers/trainer.hpp"
@@ -32,15 +32,15 @@
 
 namespace lbann {
 
-sgd_execution_context::sgd_execution_context(execution_mode mode,
+SGDExecutionContext::SGDExecutionContext(execution_mode mode,
                                              size_t mini_batch_size)
   : m_current_mini_batch_size(mini_batch_size),
     m_effective_mini_batch_size(mini_batch_size), m_execution_mode(mode)
 {}
 
-template <class Archive> void sgd_execution_context::serialize(Archive& ar)
+template <class Archive> void SGDExecutionContext::serialize(Archive& ar)
 {
-  ar(cereal::base_class<execution_context>(this),
+  ar(cereal::base_class<ExecutionContext>(this),
      CEREAL_NVP(m_epoch),
      CEREAL_NVP(m_current_mini_batch_size),
      CEREAL_NVP(m_effective_mini_batch_size),
@@ -51,10 +51,10 @@ template <class Archive> void sgd_execution_context::serialize(Archive& ar)
 // Checkpointing
 ////////////////////////////////////////////////////////////
 
-void sgd_execution_context::save_to_checkpoint_shared(persist& p)
+void SGDExecutionContext::save_to_checkpoint_shared(persist& p)
 {
   if (get_trainer().get_comm()->am_trainer_master()) {
-    write_cereal_archive<sgd_execution_context>(*this,
+    write_cereal_archive<SGDExecutionContext>(*this,
                                                 p,
                                                 get_execution_mode(),
 #ifdef LBANN_HAS_CEREAL_XML_ARCHIVES
@@ -67,9 +67,9 @@ void sgd_execution_context::save_to_checkpoint_shared(persist& p)
   return;
 }
 
-void sgd_execution_context::load_from_checkpoint_shared(persist& p)
+void SGDExecutionContext::load_from_checkpoint_shared(persist& p)
 {
-  load_from_shared_cereal_archive<sgd_execution_context>(
+  load_from_shared_cereal_archive<SGDExecutionContext>(
     *this,
     p,
     get_execution_mode(),
@@ -83,9 +83,9 @@ void sgd_execution_context::load_from_checkpoint_shared(persist& p)
   return;
 }
 
-void sgd_execution_context::save_to_checkpoint_distributed(persist& p)
+void SGDExecutionContext::save_to_checkpoint_distributed(persist& p)
 {
-  write_cereal_archive<sgd_execution_context>(*this,
+  write_cereal_archive<SGDExecutionContext>(*this,
                                               p,
                                               get_execution_mode(),
 #ifdef LBANN_HAS_CEREAL_XML_ARCHIVES
@@ -97,9 +97,9 @@ void sgd_execution_context::save_to_checkpoint_distributed(persist& p)
   return;
 }
 
-void sgd_execution_context::load_from_checkpoint_distributed(persist& p)
+void SGDExecutionContext::load_from_checkpoint_distributed(persist& p)
 {
-  read_cereal_archive<sgd_execution_context>(*this,
+  read_cereal_archive<SGDExecutionContext>(*this,
                                              p,
                                              get_execution_mode(),
 #ifdef LBANN_HAS_CEREAL_XML_ARCHIVES
@@ -111,10 +111,10 @@ void sgd_execution_context::load_from_checkpoint_distributed(persist& p)
   return;
 }
 
-std::string sgd_execution_context::get_type() const { return "sgd"; }
+std::string SGDExecutionContext::get_type() const { return "sgd"; }
 
-bool seconds_termination_criteria::is_done(
-  sgd_execution_context const& c) const noexcept
+bool SecondsTerminationCriteria::is_done(
+  SGDExecutionContext const& c) const noexcept
 {
   auto const& comm = *(get_const_trainer().get_comm());
   int stop = (comm.am_trainer_master() &&
@@ -126,5 +126,5 @@ bool seconds_termination_criteria::is_done(
 
 } // namespace lbann
 
-#define LBANN_CLASS_NAME sgd_execution_context
+#define LBANN_CLASS_NAME SGDExecutionContext
 #include <lbann/macros/register_class_with_cereal.hpp>
diff --git a/src/execution_algorithms/sgd_training_algorithm.cpp b/src/execution_algorithms/sgd_training_algorithm.cpp
index 10aff824d28..f932811dfdc 100644
--- a/src/execution_algorithms/sgd_training_algorithm.cpp
+++ b/src/execution_algorithms/sgd_training_algorithm.cpp
@@ -28,7 +28,7 @@
 
 #include "lbann/base.hpp"
 #include "lbann/callbacks/callback.hpp"
-#include "lbann/execution_contexts/sgd_execution_context.hpp"
+#include "lbann/execution_algorithms/sgd_execution_context.hpp"
 #include "lbann/models/model.hpp"
 #include "lbann/utils/memory.hpp"
 
@@ -40,20 +40,20 @@
 
 namespace lbann {
 
-sgd_training_algorithm::sgd_training_algorithm(
-  sgd_training_algorithm const& other)
+SGDTrainingAlgorithm::SGDTrainingAlgorithm(
+  SGDTrainingAlgorithm const& other)
   : BaseType(other.get_name()),
     m_stopping_criteria{other.m_stopping_criteria->clone()},
     m_validation_context{execution_mode::validation, 1UL},
     m_validation_epochs{1UL}
 {}
 
-sgd_training_algorithm&
-sgd_training_algorithm::operator=(sgd_training_algorithm const& other)
+SGDTrainingAlgorithm&
+SGDTrainingAlgorithm::operator=(SGDTrainingAlgorithm const& other)
 {
   BaseType::operator=(other);
   m_stopping_criteria = other.m_stopping_criteria->clone();
-  m_validation_context = sgd_execution_context{execution_mode::validation, 1UL};
+  m_validation_context = SGDExecutionContext{execution_mode::validation, 1UL};
   m_validation_epochs = 1UL;
   return *this;
 }
@@ -62,14 +62,14 @@ sgd_training_algorithm::operator=(sgd_training_algorithm const& other)
 // Evaluation and training
 ////////////////////////////////////////////////////////////
 
-void sgd_training_algorithm::apply(execution_context& context,
+void SGDTrainingAlgorithm::apply(ExecutionContext& context,
                                    model& model,
                                    data_coordinator& dc,
                                    execution_mode mode)
 {
-  sgd_execution_context& sgd_context =
-    dynamic_cast<sgd_execution_context&>(context);
-  const sgd_termination_criteria& sgd_term = *m_stopping_criteria;
+  SGDExecutionContext& sgd_context =
+    dynamic_cast<SGDExecutionContext&>(context);
+  const SGDTerminationCriteria& sgd_term = *m_stopping_criteria;
   switch (mode) {
   case execution_mode::training:
     train(sgd_context, model, dc, sgd_term);
@@ -84,10 +84,10 @@ void sgd_training_algorithm::apply(execution_context& context,
   }
 }
 
-void sgd_training_algorithm::train(sgd_execution_context& c,
+void SGDTrainingAlgorithm::train(SGDExecutionContext& c,
                                    model& model,
                                    data_coordinator& dc,
-                                   sgd_termination_criteria const& term)
+                                   SGDTerminationCriteria const& term)
 {
   auto& evaluation_context = m_validation_context;
   auto& num_validation_epochs = m_validation_epochs;
@@ -137,7 +137,7 @@ void sgd_training_algorithm::train(sgd_execution_context& c,
                  model,
                  dc,
                  execution_mode::validation,
-                 epoch_termination_criteria(num_validation_epochs));
+                 EpochTerminationCriteria(num_validation_epochs));
         ++num_validation_epochs;
 
         // FIXME (trb 06/07/21): The early stopping callback is part
@@ -164,7 +164,7 @@ void sgd_training_algorithm::train(sgd_execution_context& c,
 ////////////////////////////////////////////////////////////
 
 // Returns "true" if the data_coordinator detects the end of an epoch.
-bool sgd_training_algorithm::train_mini_batch(sgd_execution_context& c,
+bool SGDTrainingAlgorithm::train_mini_batch(SGDExecutionContext& c,
                                               model& model,
                                               data_coordinator& dc)
 {
@@ -219,11 +219,11 @@ bool sgd_training_algorithm::train_mini_batch(sgd_execution_context& c,
   return finished;
 }
 
-void sgd_training_algorithm::evaluate(sgd_execution_context& c,
+void SGDTrainingAlgorithm::evaluate(SGDExecutionContext& c,
                                       model& model,
                                       data_coordinator& dc,
                                       execution_mode mode,
-                                      sgd_termination_criteria const& term)
+                                      SGDTerminationCriteria const& term)
 {
   /// @todo BVE FIXME this state needs to be set for inference-only
   /// workflows -- however, if the model will bail due to a lack of a
@@ -251,7 +251,7 @@ void sgd_training_algorithm::evaluate(sgd_execution_context& c,
   do_evaluate_end_cbs(model, mode);
 }
 
-bool sgd_training_algorithm::evaluate_mini_batch(sgd_execution_context& c,
+bool SGDTrainingAlgorithm::evaluate_mini_batch(SGDExecutionContext& c,
                                                  model& model,
                                                  data_coordinator& dc,
                                                  execution_mode mode)
@@ -282,21 +282,21 @@ bool sgd_training_algorithm::evaluate_mini_batch(sgd_execution_context& c,
 // Callbacks
 ////////////////////////////////////////////////////////////
 
-void sgd_training_algorithm::do_train_begin_cbs(model& model)
+void SGDTrainingAlgorithm::do_train_begin_cbs(model& model)
 {
   for (const auto& cb : model.get_callbacks()) {
     cb->on_train_begin(&model);
   }
 }
 
-void sgd_training_algorithm::do_train_end_cbs(model& model)
+void SGDTrainingAlgorithm::do_train_end_cbs(model& model)
 {
   for (const auto& cb : model.get_callbacks()) {
     cb->on_train_end(&model);
   }
 }
 
-void sgd_training_algorithm::do_evaluate_begin_cbs(model& model,
+void SGDTrainingAlgorithm::do_evaluate_begin_cbs(model& model,
                                                    execution_mode mode)
 {
   for (const auto& cb : model.get_callbacks()) {
@@ -316,7 +316,7 @@ void sgd_training_algorithm::do_evaluate_begin_cbs(model& model,
   }
 }
 
-void sgd_training_algorithm::do_evaluate_end_cbs(model& model,
+void SGDTrainingAlgorithm::do_evaluate_end_cbs(model& model,
                                                  execution_mode mode)
 {
   for (const auto& cb : model.get_callbacks()) {
@@ -336,25 +336,25 @@ void sgd_training_algorithm::do_evaluate_end_cbs(model& model,
   }
 }
 
-void sgd_training_algorithm::do_epoch_begin_cbs(model& model)
+void SGDTrainingAlgorithm::do_epoch_begin_cbs(model& model)
 {
   for (const auto& cb : model.get_callbacks()) {
     cb->on_epoch_begin(&model);
   }
 }
 
-void sgd_training_algorithm::do_epoch_end_cbs(model& model)
+void SGDTrainingAlgorithm::do_epoch_end_cbs(model& model)
 {
   for (const auto& cb : model.get_callbacks()) {
     cb->on_epoch_end(&model);
   }
 }
 
-void sgd_training_algorithm::do_batch_begin_cbs(model& model,
+void SGDTrainingAlgorithm::do_batch_begin_cbs(model& model,
                                                 execution_mode mode)
 {
-  sgd_execution_context& c =
-    static_cast<sgd_execution_context&>(model.get_execution_context());
+  SGDExecutionContext& c =
+    static_cast<SGDExecutionContext&>(model.get_execution_context());
 
   for (const auto& cb : model.get_callbacks()) {
     switch (mode) {
@@ -374,10 +374,10 @@ void sgd_training_algorithm::do_batch_begin_cbs(model& model,
   }
 }
 
-void sgd_training_algorithm::do_batch_end_cbs(model& model, execution_mode mode)
+void SGDTrainingAlgorithm::do_batch_end_cbs(model& model, execution_mode mode)
 {
-  sgd_execution_context& c =
-    static_cast<sgd_execution_context&>(model.get_execution_context());
+  SGDExecutionContext& c =
+    static_cast<SGDExecutionContext&>(model.get_execution_context());
 
   for (const auto& cb : model.get_callbacks()) {
     switch (mode) {
@@ -397,18 +397,18 @@ void sgd_training_algorithm::do_batch_end_cbs(model& model, execution_mode mode)
   }
 }
 
-std::string sgd_training_algorithm::get_type() const { return "sgd"; }
+std::string SGDTrainingAlgorithm::get_type() const { return "sgd"; }
 
-sgd_execution_context*
-sgd_training_algorithm::do_get_new_execution_context() const
+SGDExecutionContext*
+SGDTrainingAlgorithm::do_get_new_execution_context() const
 {
-  return new sgd_execution_context(execution_mode::invalid, 0);
+  return new SGDExecutionContext(execution_mode::invalid, 0);
 }
 } // namespace lbann
 
 template <>
-std::unique_ptr<lbann::sgd_training_algorithm>
-lbann::make<lbann::sgd_training_algorithm>(
+std::unique_ptr<lbann::SGDTrainingAlgorithm>
+lbann::make<lbann::SGDTrainingAlgorithm>(
   google::protobuf::Message const& msg_in)
 {
   auto const& params =
@@ -418,24 +418,24 @@ lbann::make<lbann::sgd_training_algorithm>(
   LBANN_ASSERT(params.parameters().UnpackTo(&sgd_params));
 
   auto const& stopping_criteria = sgd_params.stopping_criteria();
-  std::unique_ptr<lbann::sgd_termination_criteria> stopping;
+  std::unique_ptr<SGDTerminationCriteria> stopping;
   switch (stopping_criteria.criterion_case()) {
   case lbann_data::SGD::TerminationCriteria::kMaxBatches:
-    stopping = lbann::make_unique<lbann::batch_termination_criteria>(
+    stopping = make_unique<BatchTerminationCriteria>(
       stopping_criteria.max_batches());
     break;
   case lbann_data::SGD::TerminationCriteria::kMaxEpochs:
-    stopping = lbann::make_unique<lbann::epoch_termination_criteria>(
+    stopping = make_unique<EpochTerminationCriteria>(
       stopping_criteria.max_epochs());
     break;
   case lbann_data::SGD::TerminationCriteria::kMaxSeconds:
-    stopping = lbann::make_unique<lbann::seconds_termination_criteria>(
+    stopping = make_unique<SecondsTerminationCriteria>(
       stopping_criteria.max_seconds());
     //LBANN_ERROR("Time-based training not yet supported in SGD.");
     break;
   default:
     LBANN_ERROR("No stopping criteria specified.");
   }
-  return make_unique<sgd_training_algorithm>(params.name(),
+  return make_unique<SGDTrainingAlgorithm>(params.name(),
                                              std::move(stopping));
 }
diff --git a/src/execution_algorithms/training_algorithm.cpp b/src/execution_algorithms/training_algorithm.cpp
index 1641b75eb39..5ceb123a7d5 100644
--- a/src/execution_algorithms/training_algorithm.cpp
+++ b/src/execution_algorithms/training_algorithm.cpp
@@ -34,16 +34,16 @@
 
 namespace lbann {
 
-training_algorithm::training_algorithm(std::string name)
+TrainingAlgorithm::TrainingAlgorithm(std::string name)
   : m_name{std::move(name)}
 {}
 
-std::string const& training_algorithm::get_name() const noexcept
+std::string const& TrainingAlgorithm::get_name() const noexcept
 {
   return m_name;
 }
 
-void training_algorithm::setup_models(
+void TrainingAlgorithm::setup_models(
   std::vector<observer_ptr<model>> const& models,
   size_t max_mini_batch_size,
   DataReaderMetaData& dr_metadata)
diff --git a/src/execution_algorithms/unit_test/training_algorithm_factory_test.cpp b/src/execution_algorithms/unit_test/training_algorithm_factory_test.cpp
index 8357a331883..0956bad5e19 100644
--- a/src/execution_algorithms/unit_test/training_algorithm_factory_test.cpp
+++ b/src/execution_algorithms/unit_test/training_algorithm_factory_test.cpp
@@ -134,9 +134,9 @@ TEST_CASE("Building training algorithm from the factory",
     algo_msg.set_name("my sgd algo");
     algo_msg.mutable_parameters()->PackFrom(sgd_msg);
 
-    auto sgd = lbann::make_abstract<lbann::training_algorithm>(algo_msg);
+    auto sgd = lbann::make_abstract<lbann::TrainingAlgorithm>(algo_msg);
 
-    REQUIRE_NOTHROW(dynamic_cast<lbann::sgd_training_algorithm const&>(*sgd));
+    REQUIRE_NOTHROW(dynamic_cast<lbann::SGDTrainingAlgorithm const&>(*sgd));
 
     REQUIRE(sgd->get_type() == "sgd");
     REQUIRE(sgd->get_name() == "my sgd algo");
@@ -157,7 +157,7 @@ TEST_CASE("Building training algorithm from the factory",
     algo_msg.mutable_parameters()->PackFrom(wrong_msg_type);
 
     REQUIRE_THROWS_WITH(
-      lbann::make_abstract<lbann::training_algorithm>(algo_msg),
+      lbann::make_abstract<lbann::TrainingAlgorithm>(algo_msg),
       Catch::Contains("Unknown id \"TerminationCriteria\" detected"));
   }
 }
diff --git a/src/execution_contexts/CMakeLists.txt b/src/execution_contexts/CMakeLists.txt
deleted file mode 100644
index 14b452d3b93..00000000000
--- a/src/execution_contexts/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# Add the source files for this directory
-set_full_path(THIS_DIR_SOURCES
-  execution_context.cpp
-  sgd_execution_context.cpp
-  )
-
-# Propagate the files up the tree
-set(SOURCES "${SOURCES}" "${THIS_DIR_SOURCES}" PARENT_SCOPE)
diff --git a/src/layers/data_type_distconv_adapter.cpp b/src/layers/data_type_distconv_adapter.cpp
index aaa94bf1d01..c132cba655b 100644
--- a/src/layers/data_type_distconv_adapter.cpp
+++ b/src/layers/data_type_distconv_adapter.cpp
@@ -27,7 +27,7 @@
 #include "lbann/layers/data_type_distconv_adapter.hpp"
 #include "lbann/layers/data_type_layer.hpp"
 #include "lbann/models/model.hpp"
-#include "lbann/execution_contexts/sgd_execution_context.hpp"
+#include "lbann/execution_algorithms/sgd_execution_context.hpp"
 #include "lbann/trainers/trainer.hpp"
 
 namespace lbann {
diff --git a/src/layers/data_type_layer.cpp b/src/layers/data_type_layer.cpp
index 85e4ca515d6..3990efb74bb 100644
--- a/src/layers/data_type_layer.cpp
+++ b/src/layers/data_type_layer.cpp
@@ -29,7 +29,7 @@
 
 #include "matrix_builder.hpp"
 
-#include "lbann/execution_contexts/sgd_execution_context.hpp"
+#include "lbann/execution_algorithms/sgd_execution_context.hpp"
 #include "lbann/layers/data_type_layer.hpp"
 #include "lbann/models/model.hpp"
 #include "lbann/trainers/trainer.hpp"
@@ -99,7 +99,7 @@ void data_type_layer<InputTensorDataType, OutputTensorDataType>::forward_prop()
   }
 
   // Setup tensors
-  const auto& c = static_cast<sgd_execution_context&>(m_model->get_execution_context());
+  const auto& c = static_cast<SGDExecutionContext&>(m_model->get_execution_context());
   const auto& mini_batch_size = c.get_current_mini_batch_size();
   fp_setup_inputs(mini_batch_size);
   fp_setup_outputs(mini_batch_size);
@@ -138,7 +138,7 @@ void data_type_layer<InputTensorDataType, OutputTensorDataType>::back_prop_impl_
   const auto bp_start = get_time();
 
   // Setup tensors
-  const auto& c = static_cast<sgd_execution_context&>(
+  const auto& c = static_cast<SGDExecutionContext&>(
     m_model->get_execution_context());
   const auto& mini_batch_size = c.get_current_mini_batch_size();
   bp_setup_gradient_wrt_inputs(mini_batch_size);
diff --git a/src/layers/io/input_layer.cpp b/src/layers/io/input_layer.cpp
index 0754ff5aa56..aa7eb8f4488 100644
--- a/src/layers/io/input_layer.cpp
+++ b/src/layers/io/input_layer.cpp
@@ -28,8 +28,8 @@
 #include "lbann/layers/io/input_layer.hpp"
 
 #include "lbann/callbacks/imcomm.hpp"
-#include "lbann/execution_contexts/execution_context.hpp"
-#include "lbann/execution_contexts/sgd_execution_context.hpp"
+#include "lbann/execution_algorithms/execution_context.hpp"
+#include "lbann/execution_algorithms/sgd_execution_context.hpp"
 #include "lbann/utils/profiling.hpp"
 #include "lbann/utils/serialize.hpp"
 
@@ -70,7 +70,7 @@ void input_layer<TensorDataType, T_layout, Dev>::fp_setup_outputs(El::Int mini_b
   /// During model setup there is no valid execution context, but
   /// during execution there is a context
   if(this->m_model->has_valid_execution_context()) {
-    auto& c = dynamic_cast<sgd_execution_context&>(this->m_model->get_execution_context());
+    auto& c = dynamic_cast<SGDExecutionContext&>(this->m_model->get_execution_context());
     auto mode = c.get_execution_mode();
     auto effective_mini_batch_size = mini_batch_size;
     if (!(mode==execution_mode::inference)) {
diff --git a/src/layers/layer.cpp b/src/layers/layer.cpp
index 95e50606404..9dce9b8f307 100644
--- a/src/layers/layer.cpp
+++ b/src/layers/layer.cpp
@@ -24,7 +24,7 @@
 // permissions and limitations under the license.
 ////////////////////////////////////////////////////////////////////////////////
 
-#include "lbann/execution_contexts/sgd_execution_context.hpp"
+#include "lbann/execution_algorithms/sgd_execution_context.hpp"
 #include "lbann/io/file_io.hpp"
 #include "lbann/io/persist.hpp"
 #include "lbann/layers/layer.hpp"
diff --git a/src/layers/transform/evaluation.cpp b/src/layers/transform/evaluation.cpp
index de5b155a183..34a6daba6f3 100644
--- a/src/layers/transform/evaluation.cpp
+++ b/src/layers/transform/evaluation.cpp
@@ -28,7 +28,7 @@
 #include "lbann/comm_impl.hpp"
 #include "lbann/layers/transform/evaluation.hpp"
 #include "lbann/models/model.hpp"
-#include "lbann/execution_contexts/sgd_execution_context.hpp"
+#include "lbann/execution_algorithms/sgd_execution_context.hpp"
 #include "lbann/utils/exception.hpp"
 #include "lbann/utils/hydrogen_utils.hpp"
 #ifdef LBANN_HAS_GPU
@@ -252,7 +252,7 @@ void abstract_evaluation_layer<TensorDataType>::fp_compute() {
 
 template <typename TensorDataType>
 void abstract_evaluation_layer<TensorDataType>::bp_compute() {
-  const auto& context = static_cast<sgd_execution_context&>(this->m_model->get_execution_context());
+  const auto& context = static_cast<SGDExecutionContext&>(this->m_model->get_execution_context());
   const auto mini_batch_size = context.get_effective_mini_batch_size();
   El::Fill(this->get_error_signals(), TensorDataType(m_scale / mini_batch_size));
 }
diff --git a/src/models/model.cpp b/src/models/model.cpp
index 6cac99ee59f..ac60c88b696 100644
--- a/src/models/model.cpp
+++ b/src/models/model.cpp
@@ -1985,12 +1985,12 @@ void model::mark_data_store_explicitly_loading(execution_mode mode) {
 
 // At the start of the epoch, set the execution mode and make sure
 // that each layer points to this model
-void model::reset_mode(execution_context& context, execution_mode mode) {
+void model::reset_mode(ExecutionContext& context, execution_mode mode) {
   if (mode == execution_mode::invalid) {
     m_execution_context = nullptr;
     return;
   }
-  m_execution_context = static_cast<observer_ptr<execution_context>>(&context);
+  m_execution_context = static_cast<observer_ptr<ExecutionContext>>(&context);
   //  set_execution_mode(mode);
   for (El::Int i = 0; i < get_num_layers(); ++i) {
     get_layer(i).set_model(this);
diff --git a/src/proto/factories/trainer_factory.cpp b/src/proto/factories/trainer_factory.cpp
index dc1d545784a..c2fd50ddaa6 100644
--- a/src/proto/factories/trainer_factory.cpp
+++ b/src/proto/factories/trainer_factory.cpp
@@ -63,7 +63,7 @@ std::unique_ptr<trainer> construct_trainer(lbann_comm* comm,
     std::move(dc),
     proto_trainer.mini_batch_size(),
     (proto_trainer.has_training_algorithm()
-       ? make_abstract<training_algorithm>(proto_trainer.training_algorithm())
+       ? make_abstract<TrainingAlgorithm>(proto_trainer.training_algorithm())
        : nullptr));
   const auto& name = proto_trainer.name();
   if (!name.empty()) {
diff --git a/src/trainers/trainer.cpp b/src/trainers/trainer.cpp
index e6825597ead..2c16a1e3433 100644
--- a/src/trainers/trainer.cpp
+++ b/src/trainers/trainer.cpp
@@ -33,7 +33,7 @@
 #include "lbann/data_coordinator/data_coordinator_metadata.hpp"
 #include "lbann/execution_algorithms/sgd_training_algorithm.hpp"
 #include "lbann/execution_algorithms/training_algorithm.hpp"
-#include "lbann/execution_contexts/sgd_execution_context.hpp"
+#include "lbann/execution_algorithms/sgd_execution_context.hpp"
 #include "lbann/io/persist_impl.hpp"
 #include "lbann/utils/description.hpp"
 #include "lbann/utils/memory.hpp"
@@ -56,7 +56,7 @@ namespace lbann {
 trainer::trainer(lbann_comm* comm,
                  std::unique_ptr<data_coordinator> dc,
                  size_t mini_batch_size,
-                 std::unique_ptr<training_algorithm> alg)
+                 std::unique_ptr<TrainingAlgorithm> alg)
   : m_data_coordinator{std::move(dc)},
     m_training_alg{std::move(alg)},
     m_comm{comm},
@@ -128,19 +128,19 @@ void trainer::setup(std::unique_ptr<thread_pool> io_thread_pool,
 /// Check if there is already an execution context for the model in this mode,
 /// if not create one
 trainer::execution_context_key_pair_t
-trainer::check_and_build_execution_context(training_algorithm& alg,
+trainer::check_and_build_execution_context(TrainingAlgorithm& alg,
                                            observer_ptr<model> model,
                                            execution_mode mode)
 {
   auto key = std::make_pair(model, mode);
   if (m_model_execution_context.count(key) == 0) {
     /// Create a execution context for each model and execution mode
-    std::unique_ptr<execution_context> context;
-    if (dynamic_cast<observer_ptr<sgd_training_algorithm>>(&alg) != nullptr) {
+    std::unique_ptr<ExecutionContext> context;
+    if (dynamic_cast<observer_ptr<SGDTrainingAlgorithm>>(&alg) != nullptr) {
       /// @todo BVE FIXME Figure out how to get a good mini-batch size
       /// in here
       context =
-        make_unique<sgd_execution_context>(mode, get_max_mini_batch_size());
+        make_unique<SGDExecutionContext>(mode, get_max_mini_batch_size());
     }
     else {
       LBANN_ERROR("Unknown execution algorithm type.");
@@ -153,18 +153,18 @@ trainer::check_and_build_execution_context(training_algorithm& alg,
 /// Check if there is already an execution context for the model in this mode,
 /// if not create one
 trainer::execution_context_key_pair_t
-trainer::check_and_build_execution_context(execution_context& c,
+trainer::check_and_build_execution_context(ExecutionContext& c,
                                            model& model,
                                            execution_mode mode)
 {
   auto key = std::make_pair(&model, mode);
   if (m_model_execution_context.count(key) == 0) {
-    std::unique_ptr<execution_context> context;
+    std::unique_ptr<ExecutionContext> context;
     //    observer_ptr<training_algorithm> alg = const_cast
-    if (dynamic_cast<observer_ptr</*const */ sgd_execution_context>>(&c) !=
+    if (dynamic_cast<observer_ptr</*const */ SGDExecutionContext>>(&c) !=
         nullptr) {
       context =
-        make_unique<sgd_execution_context>(mode, get_max_mini_batch_size());
+        make_unique<SGDExecutionContext>(mode, get_max_mini_batch_size());
     }
     else {
       LBANN_ERROR("Unknown execution context type");
@@ -174,20 +174,20 @@ trainer::check_and_build_execution_context(execution_context& c,
   return key;
 }
 
-execution_context& trainer::get_execution_context(observer_ptr<model> model,
+ExecutionContext& trainer::get_execution_context(observer_ptr<model> model,
                                                   execution_mode mode)
 {
   auto key = std::make_pair(model, mode);
   return get_execution_context(key);
 }
 
-execution_context&
+ExecutionContext&
 trainer::get_execution_context(execution_context_key_pair_t key)
 {
   if (m_model_execution_context.count(key) == 0) {
     LBANN_ERROR("No execution context for this model / mode pair");
   }
-  return static_cast<sgd_execution_context&>(
+  return static_cast<SGDExecutionContext&>(
     *(m_model_execution_context[key].get()));
 }
 
@@ -204,7 +204,7 @@ void trainer::delete_execution_context(execution_context_key_pair_t key)
 /// @todo BVE FIXME seems like there is a bug here about mapping
 /// execution contexts to the right model
 void trainer::for_each_execution_context(
-  std::function<void(observer_ptr<execution_context>)> fn)
+  std::function<void(observer_ptr<ExecutionContext>)> fn)
 {
   for (auto&& c : m_model_execution_context) {
     // auto&& model = c.first.first;
@@ -225,13 +225,13 @@ void trainer::train(observer_ptr<model> model,
   // FIXME (trb 04/22/21): This is a temporary fix to support old PFE
   // model descriptions.
   if (!m_training_alg) {
-    std::unique_ptr<sgd_termination_criteria> stopping;
+    std::unique_ptr<SGDTerminationCriteria> stopping;
     if (num_epochs)
-      stopping = make_unique<epoch_termination_criteria>(num_epochs);
+      stopping = make_unique<EpochTerminationCriteria>(num_epochs);
     else
-      stopping = make_unique<batch_termination_criteria>(num_batches);
+      stopping = make_unique<BatchTerminationCriteria>(num_batches);
 
-    m_training_alg = std::make_unique<sgd_training_algorithm>(
+    m_training_alg = std::make_unique<SGDTrainingAlgorithm>(
       "sgd_train", std::move(stopping));
   }
   DataReaderMetaData dr_metadata = get_data_coordinator().get_dr_metadata();
@@ -264,9 +264,9 @@ void trainer::evaluate(observer_ptr<model> model,
                        execution_mode mode,
                        El::Int num_batches)
 {
-  auto sgd = make_unique<sgd_training_algorithm>(
+  auto sgd = make_unique<SGDTrainingAlgorithm>(
     "sgd_evaluate",
-    make_unique<epoch_termination_criteria>(/*num_epochs=*/1UL));
+    make_unique<EpochTerminationCriteria>(/*num_epochs=*/1UL));
   auto ctxt = sgd->get_new_execution_context();
   ctxt->set_execution_mode(mode);
   model->reset_mode(*ctxt, execution_mode::invalid);
@@ -277,7 +277,7 @@ void trainer::evaluate(observer_ptr<model> model,
   if (m_comm->get_grid_type() == GridType::NO_GRID or
       m_comm->get_grid_type() == GridType::PRIMARY_GRID) {
     sgd->evaluate(*ctxt, *model, get_data_coordinator(), mode,
-                  epoch_termination_criteria(/*num_epochs=*/1UL));
+                  EpochTerminationCriteria(/*num_epochs=*/1UL));
   }
 }
 
@@ -287,7 +287,7 @@ void trainer::evaluate(observer_ptr<model> model,
 
 bool trainer::save_to_checkpoint_shared()
 {
-  for_each_execution_context([this](observer_ptr<execution_context> ctx) {
+  for_each_execution_context([this](observer_ptr<ExecutionContext> ctx) {
     ctx->save_to_checkpoint_shared(this->get_persist_obj());
   });
   save_rng_to_checkpoint_shared(get_persist_obj(), m_comm);
@@ -327,7 +327,7 @@ bool trainer::load_from_checkpoint_shared(persist& p)
   return get_data_coordinator().load_from_checkpoint_shared(p);
 }
 
-bool trainer::load_from_checkpoint_shared(model& m, execution_context& c)
+bool trainer::load_from_checkpoint_shared(model& m, ExecutionContext& c)
 {
   // Reload the RNG once the trainer and all of the  models are setup
   // to avoid spurious turns of the RNGs
@@ -349,7 +349,7 @@ bool trainer::load_from_checkpoint_shared(model& m, execution_context& c)
       else {
         key = check_and_build_execution_context(c, m, mode);
         auto& evaluation_context =
-          static_cast<sgd_execution_context&>(get_execution_context(key));
+          static_cast<SGDExecutionContext&>(get_execution_context(key));
         evaluation_context.load_from_checkpoint_shared(get_persist_obj());
       }
     }
@@ -373,7 +373,7 @@ bool trainer::load_from_checkpoint_shared(model& m, execution_context& c)
 
 bool trainer::save_to_checkpoint_distributed()
 {
-  for_each_execution_context([this](observer_ptr<execution_context> ctx) {
+  for_each_execution_context([this](observer_ptr<ExecutionContext> ctx) {
     ctx->save_to_checkpoint_distributed(this->get_persist_obj());
   });
   save_rng_to_checkpoint_distributed(get_persist_obj(), m_comm);
@@ -393,7 +393,7 @@ bool trainer::load_from_checkpoint_distributed(persist& p)
   return get_data_coordinator().load_from_checkpoint_distributed(p);
 }
 
-bool trainer::load_from_checkpoint_distributed(model& m, execution_context& c)
+bool trainer::load_from_checkpoint_distributed(model& m, ExecutionContext& c)
 {
   load_rng_from_checkpoint(get_persist_obj(), m_comm);
 
@@ -414,7 +414,7 @@ bool trainer::load_from_checkpoint_distributed(model& m, execution_context& c)
       else {
         key = check_and_build_execution_context(c, m, mode);
         auto& evaluation_context =
-          static_cast<sgd_execution_context&>(get_execution_context(key));
+          static_cast<SGDExecutionContext&>(get_execution_context(key));
         evaluation_context.load_from_checkpoint_distributed(get_persist_obj());
       }
     }

From c88901ee9d2f9fd375dd380d6e219eb9bad12fd1 Mon Sep 17 00:00:00 2001
From: Tim Moon <moon13@llnl.gov>
Date: Mon, 1 Nov 2021 23:37:20 -0700
Subject: [PATCH 28/37] Bugfix for polynomial learning rate schedule (#1984)

* Make polynomial learning rate schedule match Keras

* Add unit test for polynomial learning rate callback

* Use synthetic data reader in test for polynomial decay learning rate schedule

* Make polynomial learning rate schedule match Keras

* Add unit test for polynomial learning rate callback

* Use synthetic data reader in test for polynomial decay learning rate schedule
---
 .../test_unit_callback_poly_learning_rate.py  | 161 ++++++++++++++++++
 include/lbann/callbacks/learning_rate.hpp     |   8 +-
 src/callbacks/learning_rate.cpp               |  27 +--
 3 files changed, 178 insertions(+), 18 deletions(-)
 create mode 100644 bamboo/unit_tests/test_unit_callback_poly_learning_rate.py

diff --git a/bamboo/unit_tests/test_unit_callback_poly_learning_rate.py b/bamboo/unit_tests/test_unit_callback_poly_learning_rate.py
new file mode 100644
index 00000000000..f9b7a401d2c
--- /dev/null
+++ b/bamboo/unit_tests/test_unit_callback_poly_learning_rate.py
@@ -0,0 +1,161 @@
+"""Test to check polynomial decay learning rate schedule.
+
+LBANN is run with the polynomial learning rate schedule and the log
+files are post-processed to make sure that the correct learning rate
+values are used.
+
+"""
+import os
+import os.path
+import random
+import re
+import sys
+
+# Bamboo utilities
+current_file = os.path.realpath(__file__)
+current_dir = os.path.dirname(current_file)
+sys.path.insert(0, os.path.join(os.path.dirname(current_dir), 'common_python'))
+import tools
+
+# ==============================================
+# Learning rate schedule parameters
+# ==============================================
+
+lr_power = 0.8
+lr_num_epochs = 5
+lr_start = 1
+lr_end = 0.1
+
+# ==============================================
+# Setup LBANN experiment
+# ==============================================
+
+def setup_experiment(lbann):
+    """Construct LBANN experiment.
+
+    Args:
+        lbann (module): Module for LBANN Python frontend
+
+    """
+    mini_batch_size = 1
+    trainer = lbann.Trainer(mini_batch_size)
+    model = construct_model(lbann)
+    data_reader = construct_data_reader(lbann)
+    optimizer = lbann.SGD(learn_rate=lr_start)
+    return trainer, model, data_reader, optimizer
+
+def construct_model(lbann):
+    """Construct LBANN model.
+
+    Args:
+        lbann (module): Module for LBANN Python frontend
+
+    """
+
+    # Layer graph
+    x = lbann.Input(data_field='samples')
+    x = lbann.FullyConnected(x, num_neurons=1)
+
+    # Model objects
+    metrics = []
+    callbacks = [
+        lbann.CallbackPolyLearningRate(
+            power=lr_power,
+            num_epochs=lr_num_epochs,
+            end_lr=lr_end,
+        ),
+    ]
+
+    # Construct model
+    return lbann.Model(lr_num_epochs+2,
+                       layers=x,
+                       metrics=metrics,
+                       callbacks=callbacks)
+
+def construct_data_reader(lbann):
+    """Construct Protobuf message for Python data reader.
+
+    The Python data reader will import the current Python file to
+    access the sample access functions.
+
+    Args:
+        lbann (module): Module for LBANN Python frontend
+
+    """
+    message = lbann.reader_pb2.DataReader()
+    _reader = message.reader.add()
+    _reader.name = 'synthetic'
+    _reader.role = 'train'
+    _reader.num_samples = 2
+    _reader.synth_dimensions = '1'
+    _reader.percent_of_data_to_use = 1.0
+    return message
+
+# ==============================================
+# Setup PyTest
+# ==============================================
+
+def augment_test_func(test_func):
+    """Augment test function to parse log files.
+
+    `tools.create_tests` creates functions that run an LBANN
+    experiment. This function creates augmented functions that parse
+    the log files after LBANN finishes running, e.g. to check metrics
+    or runtimes.
+
+    Note: The naive approach is to define the augmented test functions
+    in a loop. However, Python closures are late binding. In other
+    words, the function would be overwritten every time we define it.
+    We get around this overwriting problem by defining the augmented
+    function in the local scope of another function.
+
+    Args:
+        test_func (function): Test function created by
+            `tools.create_tests`.
+
+    Returns:
+        function: Test that can interact with PyTest.
+
+    """
+    test_name = test_func.__name__
+
+    # Define test function
+    def func(cluster, dirname):
+
+        # Run LBANN experiment
+        experiment_output = test_func(cluster, dirname)
+
+        # Parse LBANN log file
+        lr_list = []
+        log_file = experiment_output['stdout_log_file']
+        with open(log_file) as f:
+            for line in f:
+                match = re.search(
+                    'changing global learning rate to ([0-9.]+)',
+                    line)
+                if match:
+                    lr_list.append(float(match.group(1)))
+
+        # Make sure file has been parsed correctly
+        assert len(lr_list) == lr_num_epochs, \
+            f'Error parsing {log_file} ' \
+            f'(expected {lr_num_epochs} learning rates, ' \
+            f'but found {len(lr_list)})'
+
+        # Make sure learning rates match expected values
+        tol = 1e-5
+        for epoch in range(lr_num_epochs):
+            lr = lr_list[epoch]
+            scale = (1 - (epoch+1)/lr_num_epochs) ** lr_power
+            expected_lr = (lr_start - lr_end) * scale + lr_end
+            assert expected_lr-tol < lr < expected_lr+tol, \
+                f'Incorrect learning rate at epoch {epoch}' \
+                f'(expected {expected_lr}, but found {lr})'
+
+    # Return test function from factory function
+    func.__name__ = test_name
+    return func
+
+# Create test functions that can interact with PyTest
+for _test_func in tools.create_tests(setup_experiment, __file__,):
+    globals()[_test_func.__name__] = augment_test_func(_test_func)
diff --git a/include/lbann/callbacks/learning_rate.hpp b/include/lbann/callbacks/learning_rate.hpp
index 8973fe34b4e..b7741bd8565 100644
--- a/include/lbann/callbacks/learning_rate.hpp
+++ b/include/lbann/callbacks/learning_rate.hpp
@@ -295,12 +295,10 @@ class poly_learning_rate : public learning_rate {
   size_t m_num_epochs;
   /// The maximum number of iterations until which the learning rate changes
   size_t m_max_iter;
-  /// The minimum learning rate
+  /// The initial learning rate
+  float m_start_lr;
+  /// The final learning rate
   float m_end_lr;
-  /// The current rate to scale the base learning rate
-  float m_lr;
-  /// The learning rate scale used at the end of the last epoch
-  float m_last_epoch_lr;
 };
 
 // Builder function
diff --git a/src/callbacks/learning_rate.cpp b/src/callbacks/learning_rate.cpp
index 5a9e5bc8443..f1c6744eec3 100644
--- a/src/callbacks/learning_rate.cpp
+++ b/src/callbacks/learning_rate.cpp
@@ -251,15 +251,15 @@ poly_learning_rate::poly_learning_rate(
   double p, size_t n_epochs, size_t max_iter)
   : learning_rate(std::vector<std::string>()),
     m_p(p), m_num_epochs(n_epochs), m_max_iter(max_iter),
-    m_end_lr(0.0f),
-    m_lr(1.0f), m_last_epoch_lr(1.0f) {}
+    m_start_lr(0.0f), m_end_lr(0.0f)
+{}
 
 poly_learning_rate::poly_learning_rate(
   double p, size_t n_epochs, size_t max_iter, double end_lr,  std::vector<std::string> weights_names)
   : learning_rate(std::move(weights_names)),
     m_p(p), m_num_epochs(n_epochs), m_max_iter(max_iter),
-    m_end_lr(end_lr),
-    m_lr(1.0f), m_last_epoch_lr(1.0f) {}
+    m_start_lr(0.0f), m_end_lr(end_lr)
+{}
 
 /**
  * Check if the maximum number of iterations is set. If not, compute it by the
@@ -267,6 +267,7 @@ poly_learning_rate::poly_learning_rate(
  */
 void poly_learning_rate::setup(model *m) {
   learning_rate::setup(m);
+  m_start_lr = get_current_global_learning_rate();
   if (m_max_iter == 0ull) {
     data_coordinator& dc = get_trainer().get_data_coordinator();
     m_max_iter = m_num_epochs * dc.get_num_iterations_per_epoch(execution_mode::training);
@@ -277,9 +278,11 @@ void poly_learning_rate::setup(model *m) {
  * Keep the record of the learning rate at the end of the current epoch.
  */
 float poly_learning_rate::global_schedule(model *m) {
-  const float scale = m_lr / m_last_epoch_lr;
-  m_last_epoch_lr = m_lr;
-  return (poly_learning_rate::get_current_global_learning_rate() - m_end_lr) * scale + m_end_lr;
+  const auto& c = static_cast<const SGDExecutionContext&>(m->get_execution_context());
+  const size_t iter = std::min(c.get_step(), m_max_iter);
+  const float scale = static_cast<float>(
+    std::pow(static_cast<double>(m_max_iter-iter)/m_max_iter, m_p));
+  return (m_start_lr - m_end_lr) * scale + m_end_lr;
 }
 
 /**
@@ -287,12 +290,10 @@ float poly_learning_rate::global_schedule(model *m) {
  */
 float poly_learning_rate::optimizer_schedule(model *m, optimizer &opt) {
   const auto& c = static_cast<const SGDExecutionContext&>(m->get_execution_context());
-  const size_t cur_iter = c.get_step();
-  if (m_max_iter > cur_iter) {
-    m_lr = static_cast<float>(std::pow(static_cast<double>(m_max_iter - cur_iter)/m_max_iter, m_p));
-  }
-  const float scale = m_lr / m_last_epoch_lr;
-  return (poly_learning_rate::get_current_global_learning_rate() - m_end_lr) * scale + m_end_lr;
+  const size_t iter = std::min(c.get_step(), m_max_iter);
+  const float scale = static_cast<float>(
+    std::pow(static_cast<double>(m_max_iter-iter)/m_max_iter, m_p));
+  return (m_start_lr - m_end_lr) * scale + m_end_lr;
 }
 
 optimizerwise_adaptive_learning_rate::

From 8889fe26f742cb818fbfba6b2b90b818519bbb3a Mon Sep 17 00:00:00 2001
From: Tim Moon <moon13@llnl.gov>
Date: Tue, 2 Nov 2021 10:38:22 -0700
Subject: [PATCH 29/37] Fix bug with user-specified random seeds (#1995)

* Correctly use user-specified random seeds

They were previously always set to -1, which was ignored.

* Add environment variable to set random seed
---
 src/proto/proto_common.cpp | 2 +-
 src/utils/options.cpp      | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/proto/proto_common.cpp b/src/proto/proto_common.cpp
index d449dd41d7d..5c90e04e56b 100644
--- a/src/proto/proto_common.cpp
+++ b/src/proto/proto_common.cpp
@@ -887,7 +887,7 @@ void get_cmdline_overrides(const lbann_comm& comm, lbann_data::LbannPB& p)
   if (arg_parser.get<bool>(LBANN_OPTION_DISABLE_CUDA)) {
     model->set_disable_cuda(arg_parser.get<bool>(LBANN_OPTION_DISABLE_CUDA));
   }
-  if (arg_parser.get<int>(LBANN_OPTION_RANDOM_SEED) == -1) {
+  if (arg_parser.get<int>(LBANN_OPTION_RANDOM_SEED) != 0) {
     trainer->set_random_seed(arg_parser.get<int>(LBANN_OPTION_RANDOM_SEED));
   }
   if (arg_parser.get<bool>(LBANN_OPTION_SERIALIZE_IO)) {
diff --git a/src/utils/options.cpp b/src/utils/options.cpp
index eaa91dd9f42..323332518ff 100644
--- a/src/utils/options.cpp
+++ b/src/utils/options.cpp
@@ -194,8 +194,9 @@ void construct_std_options()
                         "");
   arg_parser.add_option(LBANN_OPTION_RANDOM_SEED,
                         {"--random_seed", "--rand_seed"},
-                        "[STD] Value to seed RNG",
-                        -1);
+                        utils::ENV("LBANN_RANDOM_SEED"),
+                        "[STD] RNG seed",
+                        0);
   arg_parser.add_option(LBANN_OPTION_READER, {"--reader"}, "[STD] TODO", "");
   arg_parser.add_option(
     LBANN_OPTION_RESTART_DIR,

From 1b4908efd3e0d27cf720d559ddfd433132276a3c Mon Sep 17 00:00:00 2001
From: Tim Moon <moon13@llnl.gov>
Date: Tue, 2 Nov 2021 11:21:09 -0700
Subject: [PATCH 30/37] Debugging Distconv models (#1990)

* Accommodate CosmoFlow distributed IO in data coordinator

* Ignore error signals in distconv identity layer

CosmoFlow now runs.

* Attempt to fix bugs in distconv adapter for input layer

Input layer was recently changed to only output one tensor, instead of of outputting all tensors from data reader.

* Update ExaGAN model to use new input layer API
---
 .../cosmology/ExaGAN/train_distconv_gan.py    |   2 +-
 include/lbann/layers/io/input_layer.hpp       |  26 ++--
 .../buffered_data_coordinator.cpp             |  26 +++-
 src/layers/io/input_layer.cpp                 | 143 ++++++++++--------
 src/proto/proto_common.cpp                    |   1 +
 5 files changed, 119 insertions(+), 79 deletions(-)

diff --git a/applications/physics/cosmology/ExaGAN/train_distconv_gan.py b/applications/physics/cosmology/ExaGAN/train_distconv_gan.py
index aaee573c42a..2eae22a5a24 100644
--- a/applications/physics/cosmology/ExaGAN/train_distconv_gan.py
+++ b/applications/physics/cosmology/ExaGAN/train_distconv_gan.py
@@ -84,7 +84,7 @@ def construct_model(args):
                 depth_groups=args.depth_groups)
 
     g_device = 'GPU'
-    input_ = lbann.Input(name='input', device=g_device)
+    input_ = lbann.Input(name='input', data_field='samples')
     input_ = lbann.Reshape(input_, dims=list2str(_sample_dims),name='in_reshape', device=g_device),
     x1 = lbann.Identity(input_, parallel_strategy=None, name='x1')
     x2 = lbann.Identity(input_, name='x2') if args.compute_mse else None
diff --git a/include/lbann/layers/io/input_layer.hpp b/include/lbann/layers/io/input_layer.hpp
index 8aa75b658b1..6505ba7681b 100644
--- a/include/lbann/layers/io/input_layer.hpp
+++ b/include/lbann/layers/io/input_layer.hpp
@@ -45,15 +45,17 @@ class input_distconv_adapter: public data_type_distconv_adapter<TensorDataType>
   using TensorHost = dc::TensorHost<TensorDataType>;
   using TensorHostShuffler = dc::TensorHostShuffler<TensorDataType>;
 
-  input_distconv_adapter(Layer& layer, const bool shuffle_required);
+  input_distconv_adapter(
+    Layer& layer,
+    data_field_type data_field,
+    const bool shuffle_required);
   virtual ~input_distconv_adapter() = default;
 
-  TensorHostShuffler &get_shuffler(const TensorHost &src, const TensorHost &dst,
-                                   int mat_idx);
+  TensorHostShuffler &get_shuffler(const TensorHost &src, const TensorHost &dst);
   void setup_fp_tensors() override;
   std::unique_ptr<TensorDevType> setup_activations_i(int index) const override;
   dc::Shape get_activations_local_shape(int index) const override;
-  dc::Shape get_activations_shape(int index) const;
+  dc::Shape get_activations_shape(int index) const override;
   void setup_shuffler_buffers(const TensorHost &src, const TensorHost &dst);
 
   // No bp tensors needed for this layer.
@@ -69,15 +71,18 @@ class input_distconv_adapter: public data_type_distconv_adapter<TensorDataType>
   // Nothing to do here as everything is done in fp_compute_distconv.
   void fp_setup(El::Int mini_batch_size) override {}
   void fp_compute();
-  bool is_input_processed(size_t index) const;
 
  private:
-  std::vector<bool> m_is_input_processed;
-  std::vector<std::unique_ptr<TensorHost>> m_original_host_tensors;
-  std::vector<std::unique_ptr<TensorHost>> m_host_tensors;
+
+  /// @brief Data field accessed by corresponding input layer
+  data_field_type m_data_field;
+
+  bool m_is_input_processed;
+  std::unique_ptr<TensorHost> m_original_host_tensor;
+  std::unique_ptr<TensorHost> m_host_tensor;
 
   const bool m_shuffle_required;
-  std::vector<std::array<std::unique_ptr<TensorHostShuffler>, 4>> m_shufflers;
+  std::array<std::unique_ptr<TensorHostShuffler>, 4> m_shufflers;
   std::unique_ptr<TensorDataType> m_shuffler_src_buf;
   size_t m_shuffler_src_buf_size = 0;
   std::unique_ptr<TensorDataType> m_shuffler_dst_buf;
@@ -185,11 +190,12 @@ class input_layer : public data_type_layer<TensorDataType> {
   }
   void setup_distconv_adapter(const DataReaderMetaData& dr_metadata) override {
     this->get_distconv_adapter_ptr() = make_unique<distconv_adapter_type>(
-        *this, dr_metadata.shuffle_required);
+      *this, m_data_field, dr_metadata.shuffle_required);
   }
   distconv_adapter_type& get_distconv_adapter() override;
   const distconv_adapter_type& get_distconv_adapter() const override;
   bool keep_original_outputs(int index) const override;
+  bool keep_original_gradient_wrt_outputs(int index) const override;
 ///@}
 #endif // LBANN_HAS_DISTCONV
 };
diff --git a/src/data_coordinator/buffered_data_coordinator.cpp b/src/data_coordinator/buffered_data_coordinator.cpp
index 43ec0677562..2e09b98aee5 100644
--- a/src/data_coordinator/buffered_data_coordinator.cpp
+++ b/src/data_coordinator/buffered_data_coordinator.cpp
@@ -65,8 +65,6 @@ void buffered_data_coordinator<TensorDataType>::setup_data_fields(
 
 #ifdef LBANN_HAS_DISTCONV
   if (dc::is_cosmoflow_parallel_io_enabled()) {
-    El::Int num_neurons = get_linearized_data_size();
-    num_neurons /= dc::get_number_of_io_partitions();
     // TODO: Make sure that TensorDatType is equivalent to the HDF5
     // data reader's data type (float as default).
     // TensorDataType is assumed to be 2-byte integer types such as
@@ -80,15 +78,33 @@ void buffered_data_coordinator<TensorDataType>::setup_data_fields(
   /// ranks are participating in I/O
   El::Int local_mini_batch_size = max_mini_batch_size / this->m_comm->get_procs_per_trainer();
   El::Int partial_mini_batch_size = max_mini_batch_size % this->m_comm->get_procs_per_trainer();
+  if(partial_mini_batch_size > 0 && this->m_comm->get_rank_in_trainer() < partial_mini_batch_size) {
+    local_mini_batch_size++;
+  }
+
 #ifdef LBANN_HAS_DISTCONV
   if (dc::is_cosmoflow_parallel_io_enabled()) {
+    // Manually resize buffers for CosmoFlow data tensors
     assert_eq(local_mini_batch_size, 1);
     assert_eq(partial_mini_batch_size, 0);
+    El::Int linearized_size = get_linearized_data_size();
+    linearized_size /= dc::get_number_of_io_partitions();
+    for (const auto& buf_map : m_data_buffers) {
+      const data_buffer_map_t& buffer_map = buf_map;
+      for (const auto& [mode, data_buffer] : buffer_map) {
+        auto& input_buffers = data_buffer->m_input_buffers;
+        if (input_buffers.count(INPUT_DATA_TYPE_SAMPLES) > 0
+            && input_buffers[INPUT_DATA_TYPE_SAMPLES]->IsEmpty()) {
+          input_buffers[INPUT_DATA_TYPE_SAMPLES]->Resize(linearized_size,
+                                                         max_mini_batch_size);
+          El::Zeros_seq(data_buffer->m_indices_fetched_per_mb,
+                        local_mini_batch_size,
+                        1);
+        }
+      }
+    }
   }
 #endif // LBANN_HAS_DISTCONV
-  if(partial_mini_batch_size > 0 && this->m_comm->get_rank_in_trainer() < partial_mini_batch_size) {
-    local_mini_batch_size++;
-  }
 
   // Check to see if there are any data fields with unallocated buffers
   for (auto& data_field : m_active_data_fields) {
diff --git a/src/layers/io/input_layer.cpp b/src/layers/io/input_layer.cpp
index aa7eb8f4488..14f935ecb14 100644
--- a/src/layers/io/input_layer.cpp
+++ b/src/layers/io/input_layer.cpp
@@ -182,34 +182,33 @@ void input_layer<T,L,D>::fill_onnx_node(onnx::GraphProto& graph) const
 template <typename TensorDataType,
           data_layout T_layout, El::Device Dev>
 input_distconv_adapter<TensorDataType, T_layout, Dev>::
-input_distconv_adapter(Layer& layer, const bool shuffle_required)
+input_distconv_adapter(
+  Layer& layer,
+  const data_field_type data_field,
+  const bool shuffle_required)
   : data_type_distconv_adapter<TensorDataType>(layer),
-  m_shuffle_required(shuffle_required) {
+    m_data_field(data_field),
+    m_shuffle_required(shuffle_required) {
+
+  // Distconv currently only supports CosmoFlow data
+  if (m_data_field != INPUT_DATA_TYPE_SAMPLES
+      && m_data_field != INPUT_DATA_TYPE_RESPONSES) {
+    LBANN_ERROR(
+      "attempted to create distconv adapter for ",
+      "input layer with unsupported data field (",m_data_field,")");
+  }
+
   // Input data is only processed when its consumer layer is also
   // enabled for distconv
-  for (int i = 0; i < layer.get_num_children(); ++i) {
-    m_is_input_processed.push_back(layer.get_child_layers()[i]->distconv_enabled());
-  }
-  if (m_shuffle_required) {
-    m_shufflers.resize(layer.get_num_children());
-  }
-}
+  m_is_input_processed = layer.get_child_layer().distconv_enabled();
 
-template <typename TensorDataType,
-          data_layout T_layout, El::Device Dev>
-bool input_distconv_adapter<TensorDataType, T_layout, Dev>::
-is_input_processed(size_t index) const {
-  if (index >= m_is_input_processed.size()) {
-    LBANN_ERROR("Invalid index: ", index);
-  }
-  return m_is_input_processed[index];
 }
 
 template <typename TensorDataType,
           data_layout T_layout, El::Device Dev>
 typename input_distconv_adapter<TensorDataType, T_layout, Dev>::TensorHostShuffler&
 input_distconv_adapter<TensorDataType, T_layout, Dev>::get_shuffler(
-    const TensorHost &src, const TensorHost &dst, int mat_idx) {
+    const TensorHost &src, const TensorHost &dst) {
   size_t cur_mb_size = src.get_shape()[dc::get_sample_dim()];
   auto src_buf = m_shuffler_src_buf.get();
   auto dst_buf = m_shuffler_dst_buf.get();
@@ -224,7 +223,7 @@ input_distconv_adapter<TensorDataType, T_layout, Dev>::get_shuffler(
     shfl_idx = 1 + static_cast<int>(mode);
   }
   assert_always(shfl_idx >= 0 && shfl_idx < 4);
-  auto &shfl = m_shufflers[mat_idx][shfl_idx];
+  auto &shfl = m_shufflers[shfl_idx];
   if (shfl == nullptr) {
     shfl = make_unique<TensorHostShuffler>(
         src, dst, src_buf, dst_buf);
@@ -237,10 +236,10 @@ template <typename TensorDataType,
 void input_distconv_adapter<TensorDataType, T_layout, Dev>::setup_fp_tensors() {
   const auto sample_dist = dc::get_hydrogen_data_parallel_distribution(
       dc::get_num_dims(this->layer()));
-  for (int mat_idx = 0; mat_idx < this->layer().get_num_children(); ++mat_idx) {
-    if (!is_input_processed(mat_idx)) continue;
 
-    const auto shape = this->get_activations_shape(mat_idx);
+  if (m_is_input_processed) {
+
+    const auto shape = this->get_activations_shape(0);
     auto local_shape = shape;
     if (m_shuffle_required) {
       local_shape[dc::get_sample_dim()] = 0;
@@ -254,7 +253,7 @@ void input_distconv_adapter<TensorDataType, T_layout, Dev>::setup_fp_tensors() {
     const dc::LocaleMPI loc(dc::get_mpi_comm(), false);
 
     auto dist = this->get_activations_dist();
-    if (mat_idx == 1) {
+    if (m_data_field == INPUT_DATA_TYPE_RESPONSES) {
       // assumes no halo for the ground-truth data
       dist.clear_overlap();
     }
@@ -264,29 +263,29 @@ void input_distconv_adapter<TensorDataType, T_layout, Dev>::setup_fp_tensors() {
     const auto original_host_tensor_dist = m_shuffle_required ?
         sample_dist : dist_no_halo;
     // Create a view to the host LBANN matrix
-    m_original_host_tensors.emplace_back(
-        make_unique<TensorHost>(shape, loc, original_host_tensor_dist, local_shape));
+    m_original_host_tensor
+      = make_unique<TensorHost>(shape, loc, original_host_tensor_dist, local_shape);
 
     // When shuffled, host tensor will have the same distribution as
     // the final output; otherwise, it is just a view to the host
     // LBANN matrix, so no overlap.
     auto host_tensor_dist = m_shuffle_required ? dist : dist_no_halo;
-    m_host_tensors.emplace_back(
-        make_unique<TensorHost>(shape, loc, host_tensor_dist));
+    m_host_tensor
+      = make_unique<TensorHost>(shape, loc, host_tensor_dist);
 
     if (m_shuffle_required) {
       // TODO: This is a temporary hack. Should use
       // CUDAHostPooledAllocator, but the shuffler is
       // only specialized for BaseAllocator.
-      size_t buf_size = m_host_tensors.back()->get_local_real_size()
-          * sizeof(TensorDataType);
+      size_t buf_size = m_host_tensor->get_local_real_size() * sizeof(TensorDataType);
       TensorDataType *buf = nullptr;
       CHECK_CUDA(cudaMallocHost(&buf, buf_size));
       // Note buf should be deallocated.
-      dc::tensor::View(*m_host_tensors.back(), buf);
-      setup_shuffler_buffers(*m_original_host_tensors.back(),
-                             *m_host_tensors.back());
+      dc::tensor::View(*m_host_tensor, buf);
+      setup_shuffler_buffers(*m_original_host_tensor,
+                             *m_host_tensor);
     }
+
   }
 
   this->setup_activations();
@@ -297,12 +296,12 @@ template <typename TensorDataType,
 std::unique_ptr<typename input_distconv_adapter<TensorDataType, T_layout, Dev>::TensorDevType>
 input_distconv_adapter<TensorDataType, T_layout, Dev>::
 setup_activations_i(int index) const {
-  if (!is_input_processed(index)) return nullptr;
-  if (index == 0) {
+  if (!m_is_input_processed) return nullptr;
+  if (m_data_field == INPUT_DATA_TYPE_SAMPLES) {
     return data_type_distconv_adapter<TensorDataType>::
         setup_activations_i(index);
-  } else {
-    assert_eq(index, 1);
+  }
+  else if (m_data_field == INPUT_DATA_TYPE_RESPONSES) {
     // Note: the default setup_activations_i can't be used because
     // the distribution might need to be changed to remove
     // overlap. This can be fixed by making each tensor hav a
@@ -317,6 +316,9 @@ setup_activations_i(int index) const {
     t->zero(hydrogen::cuda::GetDefaultStream());
     return t;
   }
+  else {
+    LBANN_ERROR("unsupported data field (",m_data_field,")");
+  }
 }
 
 template <typename TensorDataType,
@@ -332,24 +334,28 @@ template <typename TensorDataType,
           data_layout T_layout, El::Device Dev>
 dc::Shape input_distconv_adapter<TensorDataType, T_layout, Dev>::
 get_activations_shape(int index) const {
-  if (index == 0) {
+  if (m_data_field == INPUT_DATA_TYPE_SAMPLES) {
     return data_type_distconv_adapter<TensorDataType>::
         get_activations_shape(index);
-  } else {
-    assert_eq(index, 1);
+  }
+  else if (m_data_field == INPUT_DATA_TYPE_RESPONSES) {
     // TODO: This is a temporary hack. The label tensor shape should
     //be set based on the shape set by the data reader, but the data
     //reader does not provide it. Using the shape shape as the data
     //tensor works fine for the U-Net model.
-    auto shape = this->get_activations_shape(0);
+    auto shape = data_type_distconv_adapter<TensorDataType>::
+      get_activations_shape(0); /// @todo Should this be getting shape corresponding to INPUT_DATA_TYPE_SAMPLES?
     auto label_size = data_type_distconv_adapter<TensorDataType>::
-        get_activations_shape(1).reduce_prod();
+        get_activations_shape(0).reduce_prod();
     const std::string env = std::getenv("DISTCONV_LABEL_NUM_CHANNELS");
     auto num_channels = env != ""
         ? std::stoi(env) : label_size / shape.reduce_prod();
     shape[-2] = num_channels;
     return shape;
   }
+  else {
+    LBANN_ERROR("unsupported data field (",m_data_field,")");
+  }
 }
 
 template <typename TensorDataType,
@@ -377,7 +383,7 @@ template <typename TensorDataType,
 bool input_distconv_adapter<TensorDataType, T_layout, Dev>::
 child_copy_required(size_t output_index) const {
   // Not required when label is not handled.
-  if (output_index == 1 && !is_input_processed(1)) {
+  if (m_data_field == INPUT_DATA_TYPE_RESPONSES && !m_is_input_processed) {
     return false;
   } else {
     return data_type_distconv_adapter<TensorDataType>::
@@ -390,7 +396,7 @@ template <typename TensorDataType,
 bool input_distconv_adapter<TensorDataType, T_layout, Dev>::
 child_shuffle_required(size_t output_index) const {
   // Not required when label is not handled.
-  if (output_index == 1 && !is_input_processed(1)) {
+  if (m_data_field == INPUT_DATA_TYPE_RESPONSES && !m_is_input_processed) {
     return false;
   } else {
     return data_type_distconv_adapter<TensorDataType>::
@@ -410,18 +416,17 @@ void input_distconv_adapter<TensorDataType, T_layout, Dev>::fp_compute() {
   const int mb_size = static_cast<sgd_execution_context&>(
       l.get_model()->get_execution_context()).get_current_mini_batch_size();
 
-  for (int mat_idx = 0; mat_idx < l.get_num_children(); ++mat_idx) {
-    if (!is_input_processed(mat_idx)) continue;
+  if (m_is_input_processed) {
 
     // TODO: This is diabled as it raises an error when the HDF5 data
     // reader with hyperslab labels is used. Remove this assertion or
-    // reshape the actiavtion tensor (mat_idx=1).
+    // reshape the actiavtion tensor (data_field = RESPONSES).
     // assert_eq(mb_size * dc::get_number_of_io_partitions(),
-    //           l.get_activations(mat_idx).Width());
+    //           l.get_activations().Width());
 
-    auto &original_tensor = *m_original_host_tensors[mat_idx];
-    auto &host_tensor = *m_host_tensors[mat_idx];
-    auto &device_tensor = this->get_activations(mat_idx);
+    auto& original_tensor = *m_original_host_tensor;
+    auto& host_tensor = *m_host_tensor;
+    auto& device_tensor = this->get_activations();
 
     // Adjust the mini-batch size
     original_tensor.set_outermost_dimension(mb_size);
@@ -429,33 +434,35 @@ void input_distconv_adapter<TensorDataType, T_layout, Dev>::fp_compute() {
     device_tensor.set_outermost_dimension(mb_size);
 
     // Setup view
-    assert0(dc::tensor::View(
+    assert0(
+      dc::tensor::View(
         original_tensor,
-        l.get_activations(mat_idx).LockedBuffer()));
+        l.get_activations().LockedBuffer()));
 
     // Shuffle if necessary
     if (m_shuffle_required) {
       get_shuffler(
-          original_tensor, host_tensor, mat_idx).shuffle_forward(
-              original_tensor.get_const_base_ptr(),
-              host_tensor.get_base_ptr());
-    } else {
+        original_tensor, host_tensor).shuffle_forward(
+          original_tensor.get_const_base_ptr(),
+          host_tensor.get_base_ptr());
+    }
+    else {
       // The input buffer is already partitioned
-      assert0(dc::tensor::View(
+      assert0(
+        dc::tensor::View(
           host_tensor, original_tensor.get_const_buffer()));
     }
 
     // After this, there is no inter-process communication, so it's
     // safe to exit if the local tensor is empty.
-    if (host_tensor.get_local_size() == 0) {
-      continue;
+    if (host_tensor.get_local_size() > 0) {
+      prof_region_begin("copy-to-device", prof_colors[1], false);
+      assert0(dc::tensor::Copy(device_tensor, host_tensor, stream));
+      prof_region_end("copy-to-device", false);
     }
 
-    prof_region_begin("copy-to-device", prof_colors[1], false);
-    assert0(dc::tensor::Copy(
-        device_tensor, host_tensor, stream));
-    prof_region_end("copy-to-device", false);
   }
+
 }
 
 template <typename TensorDataType,
@@ -486,6 +493,16 @@ keep_original_outputs(int index) const {
   // into distconv tensors.
   return true;
 }
+
+template <typename TensorDataType,
+          data_layout T_layout,
+          El::Device Dev>
+bool input_layer<TensorDataType, T_layout, Dev>::
+keep_original_gradient_wrt_outputs(int index) const {
+  // Error signals are ignored
+  return false;
+}
+
 #endif // LBANN_HAS_DISTCONV
 
 #define PROTO_DEVICE(T, Device) \
diff --git a/src/proto/proto_common.cpp b/src/proto/proto_common.cpp
index 5c90e04e56b..2780d9e9147 100644
--- a/src/proto/proto_common.cpp
+++ b/src/proto/proto_common.cpp
@@ -213,6 +213,7 @@ void init_data_readers(
                                                           key_labels,
                                                           key_responses,
                                                           hyperslab_labels);
+      reader_hdf5->set_has_data_field(INPUT_DATA_TYPE_SAMPLES, true);
       reader_hdf5->set_has_labels(!readme.disable_labels());
       reader_hdf5->set_has_responses(!readme.disable_responses());
       reader_hdf5->set_num_responses(readme.num_responses());

From ae0ad43538a51df567ee958ae712bff0fd5a04c7 Mon Sep 17 00:00:00 2001
From: Tim Moon <moon13@llnl.gov>
Date: Thu, 4 Nov 2021 17:19:39 -0700
Subject: [PATCH 31/37] Fix compile errors with distconv (#2000)

---
 src/layers/data_type_distconv_adapter.cpp | 2 +-
 src/layers/io/input_layer.cpp             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/layers/data_type_distconv_adapter.cpp b/src/layers/data_type_distconv_adapter.cpp
index c132cba655b..e7119cb07cd 100644
--- a/src/layers/data_type_distconv_adapter.cpp
+++ b/src/layers/data_type_distconv_adapter.cpp
@@ -718,7 +718,7 @@ dc::TensorShuffler<TensorDataType> &get_shuffler(
     const dc::TensorDev<TensorDataType> &src,
     const dc::TensorDev<TensorDataType> &dst,
     const size_t max_mini_batch_size) {
-  const auto& c = static_cast<sgd_execution_context&>(
+  const auto& c = static_cast<SGDExecutionContext&>(
       layer.get_model()->get_execution_context());
   const auto& mini_batch_size = c.get_current_mini_batch_size();
   int shuffler_idx = -1;
diff --git a/src/layers/io/input_layer.cpp b/src/layers/io/input_layer.cpp
index 14f935ecb14..24e14eaa84d 100644
--- a/src/layers/io/input_layer.cpp
+++ b/src/layers/io/input_layer.cpp
@@ -413,7 +413,7 @@ void input_distconv_adapter<TensorDataType, T_layout, Dev>::fp_compute() {
   // Note that the mini-batch size of the data reader is not
   // actually the one for the current mini-batch as the mini-batch
   // index is already updated by fp_compute.
-  const int mb_size = static_cast<sgd_execution_context&>(
+  const int mb_size = static_cast<SGDExecutionContext&>(
       l.get_model()->get_execution_context()).get_current_mini_batch_size();
 
   if (m_is_input_processed) {

From 7e0b4affddb5bf2f3c45a0f8a6b2e4ce60eb8960 Mon Sep 17 00:00:00 2001
From: Tim Moon <moon13@llnl.gov>
Date: Thu, 11 Nov 2021 10:53:51 -0800
Subject: [PATCH 32/37] Optimize GPU entry-wise operator functions with
 contiguous data (#2005)

---
 include/lbann/utils/impl/gpu_lib.hpp | 220 +++++++++++++++++++--------
 src/layers/activations/relu.cu       |   8 +-
 2 files changed, 159 insertions(+), 69 deletions(-)

diff --git a/include/lbann/utils/impl/gpu_lib.hpp b/include/lbann/utils/impl/gpu_lib.hpp
index 1ff5eea85a2..4b90177277e 100644
--- a/include/lbann/utils/impl/gpu_lib.hpp
+++ b/include/lbann/utils/impl/gpu_lib.hpp
@@ -168,52 +168,112 @@ const T& array<T,N>::operator[](size_t i) const {
 // -------------------------------------------------------------
 #if defined __CUDACC__ || defined __HIPCC__
 
-/** GPU kernel to apply an entry-wise unary operator. */
+namespace apply_entrywise_operator_impl {
+
+/** @brief Apply entry-wise unary operator to 1D data
+ *
+ *  Block dims: bsize x 1 x 1
+ *
+ *  Grid dims: (size/bsize) x 1 x 1
+ */
+template <template <typename> class UnaryOperator, typename TensorDataType>
+__global__
+void unary_1d_kernel(
+  size_t size,
+  const TensorDataType* __restrict__ input,
+  TensorDataType* __restrict__ output) {
+  const size_t gid = threadIdx.x + blockIdx.x * blockDim.x;
+  const size_t nthreads = blockDim.x * gridDim.x;
+  UnaryOperator<TensorDataType> op;
+  for (size_t i = gid; i < size; i += nthreads) {
+    output[i] = op(input[i]);
+  }
+}
+
+/** @brief Apply entry-wise unary operator to 2D data
+ *
+ *  Block dims: bsizex x bsizey x 1
+ *
+ *  Grid dims: (height/bsizex) x (width/bsizey) x 1
+ */
 template <template <typename> class UnaryOperator, typename TensorDataType>
 __global__
-void entrywise_unary_operator_kernel(El::Int height, El::Int width,
-                                     const TensorDataType* __restrict__ input,
-                                     El::Int input_ldim,
-                                     TensorDataType* __restrict__ output,
-                                     El::Int output_ldim) {
-  const El::Int gid = threadIdx.x + blockIdx.x * blockDim.x;
-  const El::Int size = height * width;
-  const El::Int num_threads = blockDim.x * gridDim.x;
+void unary_2d_kernel(
+  size_t height, size_t width,
+  const TensorDataType* __restrict__ input,
+  size_t input_ldim,
+  TensorDataType* __restrict__ output,
+  size_t output_ldim) {
+  const size_t gidx = threadIdx.x + blockIdx.x * blockDim.x;
+  const size_t gidy = threadIdx.y + blockIdx.y * blockDim.y;
+  const size_t nthreadsx = blockDim.x * gridDim.x;
+  const size_t nthreadsy = blockDim.y * gridDim.y;
   UnaryOperator<TensorDataType> op;
-  for (El::Int pos = gid; pos < size; pos += num_threads) {
-    const auto& row = pos % height;
-    const auto& col = pos / height;
-    const auto& x = input[row + col * input_ldim];
-    auto& y = output[row + col * output_ldim];
-    y = op(x);
+  for (size_t j=gidy; j<width; j+=nthreadsy) {
+    for (size_t i=gidx; i<height; i+=nthreadsx) {
+      const auto& x = input[i + j*input_ldim];
+      auto& y = output[i + j*output_ldim];
+      y = op(x);
+    }
   }
 }
 
-/** GPU kernel to apply an entry-wise binary operator. */
+/** @brief Apply entry-wise binary operator to 1D data
+ *
+ *  Block dims: bsize x 1 x 1
+ *
+ *  Grid dims: (size/bsize) x 1 x 1
+ */
 template <template <typename> class BinaryOperator, typename TensorDataType>
 __global__
-void entrywise_binary_operator_kernel(El::Int height, El::Int width,
-                                     const TensorDataType* __restrict__ input1,
-                                     El::Int input1_ldim,
-                                     const TensorDataType* __restrict__ input2,
-                                     El::Int input2_ldim,
-                                     TensorDataType* __restrict__ output,
-                                     El::Int output_ldim) {
-  const El::Int gid = threadIdx.x + blockIdx.x * blockDim.x;
-  const El::Int size = height * width;
-  const El::Int num_threads = blockDim.x * gridDim.x;
+void binary_1d_kernel(
+  size_t size,
+  const TensorDataType* __restrict__ input1,
+  const TensorDataType* __restrict__ input2,
+  TensorDataType* __restrict__ output) {
+  const size_t gid = threadIdx.x + blockIdx.x * blockDim.x;
+  const size_t nthreads = blockDim.x * gridDim.x;
   BinaryOperator<TensorDataType> op;
-  for (El::Int pos = gid; pos < size; pos += num_threads) {
-    const auto& row = pos % height;
-    const auto& col = pos / height;
-    const auto& x1 = input1[row + col * input1_ldim];
-    const auto& x2 = input2[row + col * input2_ldim];
-    auto& y = output[row + col * output_ldim];
-    y = op(x1, x2);
+  for (size_t i = gid; i < size; i += nthreads) {
+    output[i] = op(input1[i], input2[i]);
   }
 }
 
-/** Apply an entry-wise unary operator to GPU data.
+/** @brief Apply entry-wise binary operator to 2D data
+ *
+ *  Block dims: bsizex x bsizey x 1
+ *
+ *  Grid dims: (height/bsizex) x (width/bsizey) x 1
+ */
+template <template <typename> class BinaryOperator, typename TensorDataType>
+__global__
+void binary_2d_kernel(
+  size_t height, size_t width,
+  const TensorDataType* __restrict__ input1,
+  size_t input1_ldim,
+  const TensorDataType* __restrict__ input2,
+  size_t input2_ldim,
+  TensorDataType* __restrict__ output,
+  size_t output_ldim) {
+  const size_t gidx = threadIdx.x + blockIdx.x * blockDim.x;
+  const size_t gidy = threadIdx.y + blockIdx.y * blockDim.y;
+  const size_t nthreadsx = blockDim.x * gridDim.x;
+  const size_t nthreadsy = blockDim.y * gridDim.y;
+  BinaryOperator<TensorDataType> op;
+  for (size_t j=gidy; j<width; j+=nthreadsy) {
+    for (size_t i=gidx; i<height; i+=nthreadsx) {
+      const auto& x1 = input1[i + j*input1_ldim];
+      const auto& x2 = input2[i + j*input2_ldim];
+      auto& y = output[i + j*output_ldim];
+      y = op(x1, x2);
+    }
+  }
+}
+
+} // namespace apply_entrywise_operator_impl
+
+/** @brief Apply an entry-wise unary operator to GPU data.
+ *
  *  The input and output data must be on GPU and must have the same
  *  dimensions.
  */
@@ -225,42 +285,57 @@ void apply_entrywise_unary_operator(
   // Check that input and output are valid
   if (input.GetDevice() != El::Device::GPU) {
     LBANN_ERROR("input is not on GPU");
-  } else if (output.GetDevice() != El::Device::GPU) {
+  }
+  else if (output.GetDevice() != El::Device::GPU) {
     LBANN_ERROR("output is not on GPU");
-  } else if (input.Height() != output.Height()
-             || input.Width() != output.Width()) {
+  }
+  else if (input.Height() != output.Height()
+           || input.Width() != output.Width()) {
     LBANN_ERROR("input matrix dimensions "
                 "(", input.Height(), " x ", input.Width(), ")"
                 "don't match output matrix dimensions "
                 "(", output.Height(), " x ", output.Width(), ")");
   }
 
-  // Get GPU grid dimensions
-  // Note: Maximum CUDA grid dimension is 2^32-1
-  // (https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications).
-  const El::Int height = input.Height();
-  const El::Int width = input.Width();
-  const El::Int block_dim = 256;
-  El::Int grid_dim = (height * width + block_dim - 1) / block_dim;
-  if (sizeof(El::Int) > sizeof(unsigned int)
-      && grid_dim > std::numeric_limits<uint32_t>::max()) {
-    grid_dim = std::numeric_limits<uint32_t>::max();
+  // Return immediately if no compute is required
+  if (output.IsEmpty()) {
+    return;
   }
 
   // Launch GPU kernel
-  if (grid_dim > 0) {
+  if (input.Contiguous() && output.Contiguous()) {
+    dim3 block_dims, grid_dims;
+    block_dims.x = 256;
+    grid_dims.x = (output.Height()*output.Width() + block_dims.x - 1) / block_dims.x;
+    gpu_lib::clip_grid_dims(grid_dims);
+    auto multisync = El::MakeMultiSync(gpu::get_sync_info(output),
+                                       gpu::get_sync_info(input));
+    hydrogen::gpu::LaunchKernel(
+      apply_entrywise_operator_impl::unary_1d_kernel<UnaryOp, TensorDataType>,
+      grid_dims, block_dims, 0, multisync,
+      output.Height()*output.Width(), input.LockedBuffer(), output.Buffer());
+  }
+  else {
+    dim3 block_dims, grid_dims;
+    block_dims.x = 256;
+    block_dims.y = 256;
+    grid_dims.x = (output.Height() + block_dims.x - 1) / block_dims.x;
+    grid_dims.y = (output.Width() + block_dims.y - 1) / block_dims.y;
+    gpu_lib::clip_grid_dims(grid_dims);
     auto multisync = El::MakeMultiSync(gpu::get_sync_info(output),
                                        gpu::get_sync_info(input));
     hydrogen::gpu::LaunchKernel(
-      entrywise_unary_operator_kernel<UnaryOp, TensorDataType>,
-      grid_dim, block_dim, 0, multisync,
-      height, width, input.LockedBuffer(), input.LDim(),
+      apply_entrywise_operator_impl::unary_2d_kernel<UnaryOp, TensorDataType>,
+      grid_dims, block_dims, 0, multisync,
+      input.Height(), input.Width(),
+      input.LockedBuffer(), input.LDim(),
       output.Buffer(), output.LDim());
   }
 
 }
 
-/** Apply an entry-wise binary operator to GPU data.
+/** @brief Apply an entry-wise binary operator to GPU data.
+ *
  *  The input and output data must be on GPU and must have the same
  *  dimensions.
  */
@@ -287,27 +362,40 @@ void apply_entrywise_binary_operator(
                 "(", output.Height(), " x ", output.Width(), ")");
   }
 
-  // Get GPU grid dimensions
-  // Note: Maximum CUDA grid dimension is 2^32-1
-  // (https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications).
-  const El::Int height = input1.Height();
-  const El::Int width = input1.Width();
-  const El::Int block_dim = 256;
-  El::Int grid_dim = (height * width + block_dim - 1) / block_dim;
-  if (sizeof(El::Int) > sizeof(unsigned int)
-      && grid_dim > std::numeric_limits<uint32_t>::max()) {
-    grid_dim = std::numeric_limits<uint32_t>::max();
+  // Return immediately if no compute is required
+  if (output.IsEmpty()) {
+    return;
   }
 
   // Launch GPU kernel
-  if (grid_dim > 0) {
+  if (input1.Contiguous() && input2.Contiguous() && output.Contiguous()) {
+    dim3 block_dims, grid_dims;
+    block_dims.x = 256;
+    grid_dims.x = (output.Height()*output.Width() + block_dims.x - 1) / block_dims.x;
+    gpu_lib::clip_grid_dims(grid_dims);
+    auto multisync = El::MakeMultiSync(gpu::get_sync_info(output),
+                                       gpu::get_sync_info(input1),
+                                       gpu::get_sync_info(input2));
+    hydrogen::gpu::LaunchKernel(
+      apply_entrywise_operator_impl::binary_1d_kernel<BinaryOp, TensorDataType>,
+      grid_dims, block_dims, 0, multisync,
+      output.Height()*output.Width(),
+      input1.LockedBuffer(), input2.LockedBuffer(), output.Buffer());
+  }
+  else {
+    dim3 block_dims, grid_dims;
+    block_dims.x = 256;
+    block_dims.y = 256;
+    grid_dims.x = (output.Height() + block_dims.x - 1) / block_dims.x;
+    grid_dims.y = (output.Width() + block_dims.y - 1) / block_dims.y;
+    gpu_lib::clip_grid_dims(grid_dims);
     auto multisync = El::MakeMultiSync(gpu::get_sync_info(output),
                                        gpu::get_sync_info(input1),
                                        gpu::get_sync_info(input2));
     hydrogen::gpu::LaunchKernel(
-      entrywise_binary_operator_kernel<BinaryOp, TensorDataType>,
-      grid_dim, block_dim, 0, multisync,
-      height, width,
+      apply_entrywise_operator_impl::binary_2d_kernel<BinaryOp, TensorDataType>,
+      grid_dims, block_dims, 0, multisync,
+      output.Height(), output.Width(),
       input1.LockedBuffer(), input1.LDim(),
       input2.LockedBuffer(), input2.LDim(),
       output.Buffer(), output.LDim());
diff --git a/src/layers/activations/relu.cu b/src/layers/activations/relu.cu
index 79244ca366b..a04dc2b6808 100644
--- a/src/layers/activations/relu.cu
+++ b/src/layers/activations/relu.cu
@@ -35,8 +35,8 @@ namespace {
 /** Entry-wise operator. */
 template <typename TensorDataType>
 struct op {
-  inline __device__ TensorDataType operator()(TensorDataType x) const {
-    return x > TensorDataType{0.f} ? x : TensorDataType{0.f};
+  inline __device__ TensorDataType operator()(const TensorDataType& x) const {
+    return gpu_lib::max(x, TensorDataType{0.f});
   }
 };
 
@@ -47,7 +47,9 @@ struct op {
  */
 template <typename TensorDataType>
 struct op_backprop {
-  inline __device__ TensorDataType operator()(TensorDataType x, TensorDataType dy) const {
+  inline __device__ TensorDataType operator()(
+    const TensorDataType& x,
+    const TensorDataType& dy) const {
     return x > TensorDataType{0.f} ? dy : TensorDataType{0.f};
   }
 };

From 07940bacbbe01e663845414a253566df0dc991ae Mon Sep 17 00:00:00 2001
From: Tim Moon <moon13@llnl.gov>
Date: Thu, 11 Nov 2021 15:41:52 -0800
Subject: [PATCH 33/37] Fix grid dimensions in GPU entry-wise operator
 functions (#2006)

---
 include/lbann/utils/impl/gpu_lib.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/lbann/utils/impl/gpu_lib.hpp b/include/lbann/utils/impl/gpu_lib.hpp
index 4b90177277e..9cd41641e19 100644
--- a/include/lbann/utils/impl/gpu_lib.hpp
+++ b/include/lbann/utils/impl/gpu_lib.hpp
@@ -318,7 +318,7 @@ void apply_entrywise_unary_operator(
   else {
     dim3 block_dims, grid_dims;
     block_dims.x = 256;
-    block_dims.y = 256;
+    block_dims.y = 1;
     grid_dims.x = (output.Height() + block_dims.x - 1) / block_dims.x;
     grid_dims.y = (output.Width() + block_dims.y - 1) / block_dims.y;
     gpu_lib::clip_grid_dims(grid_dims);
@@ -385,7 +385,7 @@ void apply_entrywise_binary_operator(
   else {
     dim3 block_dims, grid_dims;
     block_dims.x = 256;
-    block_dims.y = 256;
+    block_dims.y = 1;
     grid_dims.x = (output.Height() + block_dims.x - 1) / block_dims.x;
     grid_dims.y = (output.Width() + block_dims.y - 1) / block_dims.y;
     gpu_lib::clip_grid_dims(grid_dims);

From 5ef2866de726698eb2b2db0c4416b754225ba22e Mon Sep 17 00:00:00 2001
From: "Brian C. Van Essen" <vanessen1@llnl.gov>
Date: Wed, 10 Nov 2021 10:19:36 -0800
Subject: [PATCH 34/37] Added an integration test for the JAG MACC WAE model to
 pre-train the WAE.  Moved the MACC model definitions to MACC network
 architecture definitions.  Created a MACC trainable model function to allow
 the JAG application to have a single definition of the trainable WAE model
 that is suitable for both application and testing use cases.

---
 ...odels.py => macc_network_architectures.py} |  45 +++--
 .../physics/ICF/macc_trainable_models.py      |  81 ++++++++
 applications/physics/ICF/pre_train_jag_wae.py | 108 ++--------
 .../physics/data/jag_conduit_reader.prototext |  17 +-
 .../test_integration_pre_train_jag_wae.py     | 188 ++++++++++++++++++
 python/lbann/util/__init__.py                 |   3 +
 6 files changed, 313 insertions(+), 129 deletions(-)
 rename applications/physics/ICF/{macc_models.py => macc_network_architectures.py} (95%)
 create mode 100644 applications/physics/ICF/macc_trainable_models.py
 create mode 100644 bamboo/integration_tests/test_integration_pre_train_jag_wae.py

diff --git a/applications/physics/ICF/macc_models.py b/applications/physics/ICF/macc_network_architectures.py
similarity index 95%
rename from applications/physics/ICF/macc_models.py
rename to applications/physics/ICF/macc_network_architectures.py
index 440ba2fa746..3acdb616dbf 100644
--- a/applications/physics/ICF/macc_models.py
+++ b/applications/physics/ICF/macc_network_architectures.py
@@ -1,5 +1,6 @@
 import lbann
 import lbann.modules.base
+from lbann.util import str_list, list2str
 
 
 #Synonymous to fc_gen0
@@ -14,17 +15,17 @@ def __init__(self, out_dim,cf=1,name=None):
                      else 'macc_forward{0}'.format(MACCForward.global_count))
 
        fc = lbann.modules.FullyConnectedModule
-       
+
        assert isinstance(cf, int), 'model capacity factor should be an int!'
        #generator #fc2_gen0
        g_neurons = [x*cf for x in [32,256,1024]]
        self.gen_fc = [fc(g_neurons[i],activation=lbann.Relu, name=self.name+'gen_fc'+str(i))
                       for i in range(len(g_neurons))]
        self.predy = fc(out_dim,name=self.name+'pred_out')
-      
+
     def forward(self,x):
         return self.predy(self.gen_fc[2](self.gen_fc[1](self.gen_fc[0](x))))
- 
+
 #Synonymous to fc_gen1
 class MACCInverse(lbann.modules.Module):
 
@@ -36,7 +37,7 @@ def __init__(self, out_dim,cf=1,name=None):
                      else 'macc_inverse{0}'.format(MACCInverse.global_count))
 
        fc = lbann.modules.FullyConnectedModule
-       
+
        assert isinstance(cf, int), 'model capacity factor should be an int!'
        #generator #fc_gen1
        g_neurons = [x*cf for x in [16,128,64]]
@@ -51,7 +52,7 @@ def forward(self,y):
 class MACCWAE(lbann.modules.Module):
 
     global_count = 0  # Static counter, used for default names
-    #model capacity factor (cf) 
+    #model capacity factor (cf)
     def __init__(self, encoder_out_dim, decoder_out_dim, scalar_dim = 15, cf=1, use_CNN=False, name=None):
        self.instance = 0
        self.name = (name if name
@@ -68,63 +69,63 @@ def __init__(self, encoder_out_dim, decoder_out_dim, scalar_dim = 15, cf=1, use_
        encoder_neurons = [x*cf for x in [32,256,128]]
        decoder_neurons = [x*cf for x in [64,128,256]]
        #Enc/Dec sizes  [32, 256, 128]   [64, 128, 256]
-       print("CF, Enc/Dec sizes ", cf, " ", encoder_neurons, " ", decoder_neurons) 
+       print("CF, Enc/Dec sizes ", cf, " ", encoder_neurons, " ", decoder_neurons)
        enc_outc = [64,32,16]
        dec_outc = [32,16,4]
-       
+
        #Encoder
        self.enc_fc0 = fc(encoder_neurons[0],activation=lbann.Elu,name=self.name+'_enc_fc0')
        self.enc_fc1 = fc(encoder_neurons[1],activation=lbann.Tanh,name=self.name+'_enc_fc1')
        self.enc_fc2 = fc(encoder_neurons[2],activation=lbann.Tanh,name=self.name+'_enc_fc2')
        self.enc_out = fc(encoder_out_dim,name=self.name+'enc_out')
-     
+
        #Decoder
        self.dec_fc0 = fc(decoder_neurons[0],activation=lbann.Elu,name=self.name+'_dec_fc0')
        self.dec_fc1 = fc(decoder_neurons[1],activation=lbann.Tanh,name=self.name+'_dec_fc1')
        self.dec_fc2 = fc(decoder_neurons[2],activation=lbann.Tanh,name=self.name+'_dec_fc2')
        self.dec_out = fc(decoder_out_dim,name=self.name+'pred_y')
-       
+
        #Discriminator1
        self.d0_fc0 = fc(disc_neurons[0],activation=lbann.Relu,name=self.name+'_disc0_fc0')
        self.d0_fc1 = fc(disc_neurons[1],activation=lbann.Relu,name=self.name+'_disc0_fc1')
        self.d0_fc2 = fc(disc_neurons[2],name=self.name+'_disc0_fc2')
 
        #Discriminator2
-       #stacked_discriminator, this will be frozen, no optimizer, 
-       #layer has to be named for replace layer callback 
+       #stacked_discriminator, this will be frozen, no optimizer,
+       #layer has to be named for replace layer callback
        self.d1_fc0 = fc(disc_neurons[0],activation=lbann.Relu,name=self.name+'_disc1_fc0')
        self.d1_fc1 = fc(disc_neurons[1],activation=lbann.Relu,name=self.name+'_disc1_fc1')
        self.d1_fc2 = fc(disc_neurons[2],name=self.name+'_disc1_fc2')
 
        #Encoder_CNN
        self.enc_conv = [conv(enc_outc[i], 4, stride=2, padding=1, activation=lbann.Relu,
-                        name=self.name+'_enc_conv'+str(i)) for i in range(len(enc_outc))] 
+                        name=self.name+'_enc_conv'+str(i)) for i in range(len(enc_outc))]
 
-       #Decoder_CNN 
+       #Decoder_CNN
        #Arxiv paper/PNAS configuration is D1: Dense(32,1024)
        self.dec_cnn_fc = fc(16*8*8,activation=lbann.Relu,name=self.name+'_dec_cnn_fc')
        self.dec_fc_sca = fc(scalar_dim, name=self.name+'_dec_sca_fc')
        self.dec_convT = [conv(dec_outc[i], 4, stride=2, padding=1,
                         transpose=True, name=self.name+'_dec_conv'+str(i))
                         for i in range(len(dec_outc))]
- 
+
     def forward(self, z, y):
-         
+
         z_sample = self.encoder(y)
 
         y_recon = self.decoder(z_sample)
 
-        #d real/fake share weights, shared weights is copied to d_adv 
+        #d real/fake share weights, shared weights is copied to d_adv
         #(through replace weight callback) and freeze
-        d_real = self.discriminator0(lbann.Concatenation([y,z],axis=0))  
+        d_real = self.discriminator0(lbann.Concatenation([y,z],axis=0))
         y_z_sample = lbann.Concatenation([y,z_sample],axis=0)
-        d_fake = self.discriminator0(lbann.StopGradient(y_z_sample)) 
+        d_fake = self.discriminator0(lbann.StopGradient(y_z_sample))
         d_adv = self.discriminator1(y_z_sample) #freeze
 
         return d_real, d_fake, d_adv,y_recon
 
     def encoder(self, y):
-        return self.encoder_cnn(y) if self.use_CNN else self.encoder_fc(y) 
+        return self.encoder_cnn(y) if self.use_CNN else self.encoder_fc(y)
 
     def encoder_fc(self,y):
         return self.enc_out(self.enc_fc2(self.enc_fc1(self.enc_fc0(y))))
@@ -140,11 +141,11 @@ def encoder_cnn(self,y):
         return z
 
     def decoder(self, z):
-        return self.decoder_cnn(z) if self.use_CNN else self.decoder_fc(z) 
+        return self.decoder_cnn(z) if self.use_CNN else self.decoder_fc(z)
 
     def decoder_fc(self,z):
         return self.dec_out(self.dec_fc2(self.dec_fc1(self.dec_fc0(z))))
-   
+
     def decoder_cnn(self,z):
         x = self.dec_cnn_fc(z)
         sca = self.dec_fc_sca(lbann.Identity(x))
@@ -157,6 +158,6 @@ def decoder_cnn(self,z):
 
     def discriminator0(self,input):
         return self.d0_fc2(self.d0_fc1(self.d0_fc0(input)))
-        
+
     def discriminator1(self,input):
         return self.d1_fc2(self.d1_fc1(self.d1_fc0(input)))
diff --git a/applications/physics/ICF/macc_trainable_models.py b/applications/physics/ICF/macc_trainable_models.py
new file mode 100644
index 00000000000..10ef03ba511
--- /dev/null
+++ b/applications/physics/ICF/macc_trainable_models.py
@@ -0,0 +1,81 @@
+import lbann
+import macc_network_architectures
+from lbann.util import str_list, list2str
+
+def construct_jag_wae_model(ydim,
+                            zdim,
+                            mcf,
+                            useCNN,
+                            dump_models,
+                            ltfb_batch_interval,
+                            num_epochs
+                            ):
+    """Construct LBANN model.
+
+    JAG Wasserstein autoencoder  model
+
+    """
+
+    # Layer graph
+    input = lbann.Input(data_field='samples', name='inp_data')
+    # data is 64*64*4 images + 15 scalar + 5 param
+    #inp_slice = lbann.Slice(input, axis=0, slice_points="0 16399 16404",name='inp_slice')
+    inp_slice = lbann.Slice(input, axis=0, slice_points=str_list([0,ydim,ydim+5]),name='inp_slice')
+    gt_y = lbann.Identity(inp_slice,name='gt_y')
+    gt_x = lbann.Identity(inp_slice, name='gt_x') #param not used
+
+    zero  = lbann.Constant(value=0.0,num_neurons='1',name='zero')
+    one  = lbann.Constant(value=1.0,num_neurons='1',name='one')
+
+    z_dim = 20  #Latent space dim
+
+    z = lbann.Gaussian(mean=0.0,stdev=1.0, neuron_dims="20")
+    model = macc_network_architectures.MACCWAE(zdim,ydim,cf=mcf,use_CNN=useCNN)
+    d1_real, d1_fake, d_adv, pred_y  = model(z,gt_y)
+
+    d1_real_bce = lbann.SigmoidBinaryCrossEntropy([d1_real,one],name='d1_real_bce')
+    d1_fake_bce = lbann.SigmoidBinaryCrossEntropy([d1_fake,zero],name='d1_fake_bce')
+    d_adv_bce = lbann.SigmoidBinaryCrossEntropy([d_adv,one],name='d_adv_bce')
+    img_loss = lbann.MeanSquaredError([pred_y,gt_y])
+    rec_error = lbann.L2Norm2(lbann.WeightedSum([pred_y,gt_y], scaling_factors="1 -1"))
+
+    layers = list(lbann.traverse_layer_graph(input))
+    # Setup objective function
+    weights = set()
+    src_layers = []
+    dst_layers = []
+    for l in layers:
+      if(l.weights and "disc0" in l.name and "instance1" in l.name):
+        src_layers.append(l.name)
+      #freeze weights in disc2
+      if(l.weights and "disc1" in l.name):
+        dst_layers.append(l.name)
+        for idx in range(len(l.weights)):
+          l.weights[idx].optimizer = lbann.NoOptimizer()
+      weights.update(l.weights)
+    l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4)
+    d_adv_bce = lbann.LayerTerm(d_adv_bce,scale=0.01)
+    obj = lbann.ObjectiveFunction([d1_real_bce,d1_fake_bce,d_adv_bce,img_loss,rec_error,l2_reg])
+    # Initialize check metric callback
+    metrics = [lbann.Metric(img_loss, name='recon_error')]
+    #pred_y = macc_models.MACCWAE.pred_y_name
+    callbacks = [lbann.CallbackPrint(),
+                 lbann.CallbackTimer(),
+                 lbann.CallbackPrintModelDescription(),
+                 lbann.CallbackSaveModel(dir=dump_models),
+                 lbann.CallbackReplaceWeights(source_layers=list2str(src_layers),
+                                      destination_layers=list2str(dst_layers),
+                                      batch_interval=2)]
+
+    if(ltfb_batch_interval > 0) :
+      callbacks.append(lbann.CallbackLTFB(batch_interval=ltfb_batch_interval,metric='recon_error',
+                                    low_score_wins=True,
+                                    exchange_hyperparameters=True))
+
+    # Construct model
+    return lbann.Model(num_epochs,
+                       weights=weights,
+                       layers=layers,
+                       metrics=metrics,
+                       objective_function=obj,
+                       callbacks=callbacks)
diff --git a/applications/physics/ICF/pre_train_jag_wae.py b/applications/physics/ICF/pre_train_jag_wae.py
index ceb0bb8b630..1cb424d69cc 100644
--- a/applications/physics/ICF/pre_train_jag_wae.py
+++ b/applications/physics/ICF/pre_train_jag_wae.py
@@ -1,10 +1,9 @@
-import macc_models
+import macc_trainable_models
 import argparse
 from os.path import abspath, dirname, join
 import google.protobuf.text_format as txtf
 import lbann.contrib.launcher
 import lbann.contrib.args
-from lbann.util import str_list
 
 # ==============================================
 # Setup and launch experiment
@@ -50,16 +49,10 @@
     '--useCNN', action='store', default=False, type=bool,
     help='use CNN', metavar='BOOL')
 parser.add_argument(
-    '--data-filedir-train', action='store', default='/p/gpfs1/brainusr/datasets/10MJAG/1M_A/', type=str,
-    help='data filedir (default train dir is 10MJAG/1M_A)', metavar='NAME')
-parser.add_argument(
-    '--data-filedir-test', action='store', default='/p/gpfs1/brainusr/datasets/10MJAG/1M_B/', type=str,
-    help='data filedir (default test dir is 10MJAG/1M_B)', metavar='NAME')
-parser.add_argument(
-    '--index-list-train', action='store', default='index.txt', type=str,
+    '--sample-list-train', action='store', default='/p/vast1/lbann/datasets/JAG/10MJAG/1M_A/index.txt', type=str,
     help='index list (default index.txt)', metavar='NAME')
 parser.add_argument(
-    '--index-list-test', action='store', default='t0_sample_list_multi_10K.txt', type=str,
+    '--sample-list-test', action='store', default='/p/vast1/lbann/datasets/JAG/10MJAG/1M_B/t0_sample_list_multi_10K.txt', type=str,
     help='index list (default t0_sample_list_multi_10K.txt, 100 samples)', metavar='NAME')
 parser.add_argument(
     '--dump-outputs', action='store', default='dump_outs', type=str,
@@ -75,88 +68,12 @@
     help='LTFB batch interval (default: 0, no LTFB)', metavar='NUM')
 args = parser.parse_args()
 
-
-def list2str(l):
-    return ' '.join(l)
-
-def construct_model():
-    """Construct LBANN model.
-
-    JAG Wasserstein autoencoder  model
-
-    """
-    import lbann
-
-    # Layer graph
-    input = lbann.Input(data_field='samples', name='inp_data')
-    # data is 64*64*4 images + 15 scalar + 5 param
-    #inp_slice = lbann.Slice(input, axis=0, slice_points="0 16399 16404",name='inp_slice')
-    inp_slice = lbann.Slice(input, axis=0, slice_points=str_list([0,args.ydim,args.ydim+5]),name='inp_slice')
-    gt_y = lbann.Identity(inp_slice,name='gt_y')
-    gt_x = lbann.Identity(inp_slice, name='gt_x') #param not used
-
-    zero  = lbann.Constant(value=0.0,num_neurons='1',name='zero')
-    one  = lbann.Constant(value=1.0,num_neurons='1',name='one')
-
-    z_dim = 20  #Latent space dim
-
-    z = lbann.Gaussian(mean=0.0,stdev=1.0, neuron_dims="20")
-    model = macc_models.MACCWAE(args.zdim,args.ydim,cf=args.mcf,use_CNN=args.useCNN)
-    d1_real, d1_fake, d_adv, pred_y  = model(z,gt_y)
-
-    d1_real_bce = lbann.SigmoidBinaryCrossEntropy([d1_real,one],name='d1_real_bce')
-    d1_fake_bce = lbann.SigmoidBinaryCrossEntropy([d1_fake,zero],name='d1_fake_bce')
-    d_adv_bce = lbann.SigmoidBinaryCrossEntropy([d_adv,one],name='d_adv_bce')
-    img_loss = lbann.MeanSquaredError([pred_y,gt_y])
-    rec_error = lbann.L2Norm2(lbann.WeightedSum([pred_y,gt_y], scaling_factors="1 -1"))
-
-    layers = list(lbann.traverse_layer_graph(input))
-    # Setup objective function
-    weights = set()
-    src_layers = []
-    dst_layers = []
-    for l in layers:
-      if(l.weights and "disc0" in l.name and "instance1" in l.name):
-        src_layers.append(l.name)
-      #freeze weights in disc2
-      if(l.weights and "disc1" in l.name):
-        dst_layers.append(l.name)
-        for idx in range(len(l.weights)):
-          l.weights[idx].optimizer = lbann.NoOptimizer()
-      weights.update(l.weights)
-    l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4)
-    d_adv_bce = lbann.LayerTerm(d_adv_bce,scale=0.01)
-    obj = lbann.ObjectiveFunction([d1_real_bce,d1_fake_bce,d_adv_bce,img_loss,rec_error,l2_reg])
-    # Initialize check metric callback
-    metrics = [lbann.Metric(img_loss, name='recon_error')]
-    #pred_y = macc_models.MACCWAE.pred_y_name
-    callbacks = [lbann.CallbackPrint(),
-                 lbann.CallbackTimer(),
-                 lbann.CallbackSaveModel(dir=args.dump_models),
-                 lbann.CallbackReplaceWeights(source_layers=list2str(src_layers),
-                                      destination_layers=list2str(dst_layers),
-                                      batch_interval=2)]
-
-    if(args.ltfb_batch_interval > 0) :
-      callbacks.append(lbann.CallbackLTFB(batch_interval=args.ltfb_batch_interval,metric='recon_error',
-                                    low_score_wins=True,
-                                    exchange_hyperparameters=True))
-
-    # Construct model
-    return lbann.Model(args.num_epochs,
-                       serialize_io=True,
-                       weights=weights,
-                       layers=layers,
-                       metrics=metrics,
-                       objective_function=obj,
-                       callbacks=callbacks)
-
-
 if __name__ == '__main__':
     import lbann
 
-    trainer = lbann.Trainer(mini_batch_size=args.mini_batch_size)
-    model = construct_model()
+    trainer = lbann.Trainer(mini_batch_size=args.mini_batch_size,
+                            serialize_io=True)
+    model = macc_trainable_models.construct_jag_wae_model(args.ydim, args.zdim, args.mcf, args.useCNN, args.dump_models, args.ltfb_batch_interval, args.num_epochs)
     # Setup optimizer
     opt = lbann.Adam(learn_rate=0.0001,beta1=0.9,beta2=0.99,eps=1e-8)
     # Load data reader from prototext
@@ -168,16 +85,15 @@ def construct_model():
     kwargs = lbann.contrib.args.get_scheduler_kwargs(args)
     status = lbann.contrib.launcher.run(trainer,model, data_reader_proto, opt,
                        nodes=args.num_nodes,
-                       procs_per_node=args.ppn,
+#                       procs_per_node=args.ppn,
                        time_limit=720,
-                       setup_only=True,
+#                       setup_only=True,
                        job_name=args.job_name,
                        lbann_args=['--use_data_store --preload_data_store',
                                    f'--metadata={metadata_prototext}',
-                                   f'--index_list_train={args.index_list_train}',
-                                   f'--index_list_test={args.index_list_test}',
-                                   f'--data_filedir_train={args.data_filedir_train}',
-                                   f'--data_filedir_test={args.data_filedir_test}',
-                                   f'--procs_per_trainer={run_args.procs_per_trainer}'],
+                                   f'--data_reader_percent=0.01',
+                                   f'--sample_list_train={args.sample_list_train}',
+                                   f'--sample_list_test={args.sample_list_test}',
+                                   f'--procs_per_trainer={args.procs_per_trainer}'],
                                    **kwargs)
     print(status)
diff --git a/applications/physics/data/jag_conduit_reader.prototext b/applications/physics/data/jag_conduit_reader.prototext
index 97351f8af65..010814d6961 100644
--- a/applications/physics/data/jag_conduit_reader.prototext
+++ b/applications/physics/data/jag_conduit_reader.prototext
@@ -14,15 +14,12 @@ data_reader {
     name: "jag_conduit"
     role: "train"
     shuffle: true
-    # change to a lustre path
-    data_filedir: "/p/gpfs1/brainusr/datasets/10MJAG/1M_A/"
-    index_list: "index.txt"
-    index_list_per_trainer: true
-    index_list_per_model: false
+    sample_list: "/p/vast1/lbann/datasets/JAG/10MJAG/1M_A/index.txt"
+    sample_list_per_trainer: true
+    sample_list_per_model: false
 
     validation_percent: 0.1
     tournament_percent: 0.1
-    absolute_sample_count: 0
     percent_of_data_to_use: 1.0
     disable_responses: true
     disable_labels: true
@@ -34,11 +31,9 @@ data_reader {
     name: "jag_conduit"
     role: "test"
     shuffle: true
-    # change to a lustre path
-    data_filedir: "/p/gpfs1/brainusr/datasets/10MJAG/1M_B"
-    index_list: "t0_sample_list_multi_10K.txt" #100 samples
-    index_list_per_trainer: false
-    index_list_per_model: false
+    sample_list: "/p/vast1/lbann/datasets/JAG/10MJAG/1M_B/t0_sample_list_multi_10K.txt" #100 samples
+    sample_list_per_trainer: false
+    sample_list_per_model: false
 
     validation_percent: 0
     tournament_percent: 0
diff --git a/bamboo/integration_tests/test_integration_pre_train_jag_wae.py b/bamboo/integration_tests/test_integration_pre_train_jag_wae.py
new file mode 100644
index 00000000000..d6755e3acb1
--- /dev/null
+++ b/bamboo/integration_tests/test_integration_pre_train_jag_wae.py
@@ -0,0 +1,188 @@
+import functools
+import operator
+import os
+import os.path
+import re
+import sys
+import numpy as np
+import google.protobuf.text_format
+import pytest
+from os.path import abspath, dirname, join, realpath
+import tools
+
+# Local files
+current_file = realpath(__file__)
+lbann_dir = dirname(os.path.dirname(os.path.dirname(current_file)))
+app_path = join(lbann_dir, 'applications', 'physics','ICF')
+sys.path.append(app_path)
+
+# ==============================================
+# Options
+# ==============================================
+
+# Training options
+num_epochs = 10
+mini_batch_size = 128
+num_nodes = 1
+procs_per_node = 2 # Only use 2 GPUs to ensure comparable testing between lassen and pascal
+                   # this model is very sensitive to differences in how it is initialized
+                   # and parallelized
+
+model_zoo_dir = dirname(app_path)
+data_reader_prototext = join(model_zoo_dir,
+                             'data',
+                             'jag_conduit_reader.prototext')
+metadata_prototext = join(model_zoo_dir,
+                             'data',
+                             'jag_100M_metadata.prototext')
+
+ydim = 16399 # image+scalar dim (default: 64*64*4+15=16399)
+zdim = 20 # latent space dim (default: 20)
+mcf = 1 # model capacity factor (default: 1)
+useCNN = False
+
+# Reconstruction loss
+expected_train_pc_range = (19.9, 20.0)
+expected_test_pc_range = (19.1, 19.2)
+
+# Average mini-batch time (in sec) for each LC system
+# Note that run times are with LBANN_DETERMINISTIC set
+# Commented out times are prior to thread safe RNGs
+expected_mini_batch_times = {
+    'lassen':   0.0530066,
+    'pascal':   0.133671,
+}
+# ==============================================
+# Setup LBANN experiment
+# ==============================================
+
+def make_data_reader(lbann):
+    """Make Protobuf message for HRRL  data reader.
+
+    """
+    import lbann.contrib.lc.paths
+
+    # Load data readers from prototext
+    message = lbann.lbann_pb2.LbannPB()
+    with open(data_reader_prototext, 'r') as f:
+        google.protobuf.text_format.Merge(f.read(), message)
+    message = message.data_reader
+
+    # Use less training data for the integration test
+    message.reader[0].percent_of_data_to_use = 0.01
+
+    # Set paths
+    return message
+
+def setup_experiment(lbann):
+    """Construct LBANN experiment.
+
+    Args:
+        lbann (module): Module for LBANN Python frontend
+
+    """
+    if tools.system(lbann) != 'lassen' and tools.system(lbann) != 'pascal':
+      message = f'{os.path.basename(__file__)} is only supported on lassen and pascal systems'
+      print('Skip - ' + message)
+      pytest.skip(message)
+
+    trainer = lbann.Trainer(mini_batch_size=mini_batch_size,
+                            serialize_io=True)
+    import macc_trainable_models
+    dump_models = 'dump_models'
+    ltfb_batch_interval = 0
+    model = macc_trainable_models.construct_jag_wae_model(ydim, zdim, mcf, useCNN, dump_models, ltfb_batch_interval, num_epochs)
+
+    # Setup optimizer
+    opt = lbann.Adam(learn_rate=0.0001,beta1=0.9,beta2=0.99,eps=1e-8)
+    # Load data reader from prototext
+    data_reader = make_data_reader(lbann)
+
+    return trainer, model, data_reader, opt
+
+# ==============================================
+# Setup PyTest
+# ==============================================
+
+def augment_test_func(test_func):
+    """Augment test function to parse log files.
+
+    `tools.create_tests` creates functions that run an LBANN
+    experiment. This function creates augmented functions that parse
+    the log files after LBANN finishes running, e.g. to check metrics
+    or runtimes.
+
+    Note: The naive approach is to define the augmented test functions
+    in a loop. However, Python closures are late binding. In other
+    words, the function would be overwritten every time we define it.
+    We get around this overwriting problem by defining the augmented
+    function in the local scope of another function.
+
+    Args:
+        test_func (function): Test function created by
+            `tools.create_tests`.
+
+    Returns:
+        function: Test that can interact with PyTest.
+
+    """
+    test_name = test_func.__name__
+
+    # Define test function
+    def func(cluster, dirname,weekly):
+
+        if not weekly:
+            pytest.skip('This app runs {} with weekly builds only'.format(test_name))
+
+        # Run LBANN experiment
+        experiment_output = test_func(cluster, dirname)
+
+        # Parse LBANN log file
+        train_pc = None
+        test_pc = None
+        mini_batch_times = []
+        with open(experiment_output['stdout_log_file']) as f:
+            for line in f:
+                match = re.search('training epoch [0-9]+ recon_error : ([0-9.]+)', line)
+                if match:
+                    train_pc = float(match.group(1))
+                match = re.search('test recon_error : ([0-9.]+)', line)
+                if match:
+                    test_pc = float(match.group(1))
+                match = re.search('training epoch [0-9]+ mini-batch time statistics : ([0-9.]+)s mean', line)
+                if match:
+                    mini_batch_times.append(float(match.group(1)))
+
+        # Check if training reconstruction is within expected range
+        assert (expected_train_pc_range[0]
+                < train_pc
+                < expected_train_pc_range[1]), \
+                'train reconstruction error is outside expected range'
+
+        # Check if testing reconstruction  is within expected range
+        assert (expected_test_pc_range[0]
+                < test_pc
+                < expected_test_pc_range[1]), \
+                'test reconstruction error is outside expected range'
+
+        # Check if mini-batch time is within expected range
+        # Note: Skip first epoch since its runtime is usually an outlier
+        mini_batch_times = mini_batch_times[1:]
+        mini_batch_time = sum(mini_batch_times) / len(mini_batch_times)
+        assert (0.75 * expected_mini_batch_times[cluster]
+                < mini_batch_time
+                < 1.25 * expected_mini_batch_times[cluster]), \
+                'average mini-batch time is outside expected range'
+
+    # Return test function from factory function
+    func.__name__ = test_name
+    return func
+
+m_lbann_args=f"--use_data_store --preload_data_store --metadata={metadata_prototext}"
+# Create test functions that can interact with PyTest
+for _test_func in tools.create_tests(setup_experiment,
+                                     __file__,
+                                     lbann_args=[m_lbann_args],
+                                     procs_per_node=procs_per_node,
+                                     nodes=num_nodes):
+    globals()[_test_func.__name__] = augment_test_func(_test_func)
diff --git a/python/lbann/util/__init__.py b/python/lbann/util/__init__.py
index 18c53ce0ace..e6e194c2cb3 100644
--- a/python/lbann/util/__init__.py
+++ b/python/lbann/util/__init__.py
@@ -17,6 +17,9 @@ def str_list(it, sep=' '):
     """Convert an iterable object to a string."""
     return sep.join(str(i) for i in make_iterable(it))
 
+def list2str(l):
+    return ' '.join(l)
+
 def make_nd_array(*dims):
     """Create a multi-dimensional array with given dimensions.
 

From a7ba66efa3785439399ad864596018e285d43bde Mon Sep 17 00:00:00 2001
From: "Brian C. Van Essen" <vanessen1@llnl.gov>
Date: Thu, 11 Nov 2021 16:05:59 -0800
Subject: [PATCH 35/37] Updated the train_macc_surrogate application to have an
 integrated test.

---
 .../physics/ICF/macc_trainable_models.py      | 105 +++++++++
 applications/physics/ICF/pre_train_jag_wae.py |  14 +-
 .../physics/ICF/train_macc_surrogate.py       | 153 +++----------
 .../test_integration_pre_train_jag_wae.py     |  12 +-
 .../test_integration_train_macc_surrogate.py  | 203 ++++++++++++++++++
 5 files changed, 359 insertions(+), 128 deletions(-)
 create mode 100644 bamboo/integration_tests/test_integration_train_macc_surrogate.py

diff --git a/applications/physics/ICF/macc_trainable_models.py b/applications/physics/ICF/macc_trainable_models.py
index 10ef03ba511..e9bd563f96c 100644
--- a/applications/physics/ICF/macc_trainable_models.py
+++ b/applications/physics/ICF/macc_trainable_models.py
@@ -79,3 +79,108 @@ def construct_jag_wae_model(ydim,
                        metrics=metrics,
                        objective_function=obj,
                        callbacks=callbacks)
+
+def construct_macc_surrogate_model(xdim,
+                                   ydim,
+                                   zdim,
+                                   wae_mcf,
+                                   surrogate_mcf,
+                                   lambda_cyc,
+                                   useCNN,
+                                   dump_models,
+                                   pretrained_dir,
+                                   ltfb_batch_interval,
+                                   num_epochs
+                                   ):
+    """Construct MACC surrogate model.
+
+    See https://arxiv.org/pdf/1912.08113.pdf model architecture and other details
+
+    """
+    # Layer graph
+    input = lbann.Input(data_field='samples',name='inp_data')
+    # data is 64*64*4 images + 15 scalar + 5 param
+    inp_slice = lbann.Slice(input, axis=0, slice_points=str_list([0,ydim,ydim+xdim]),name='inp_slice')
+    gt_y = lbann.Identity(inp_slice,name='gt_y')
+    gt_x = lbann.Identity(inp_slice, name='gt_x') #param not used
+
+    zero  = lbann.Constant(value=0.0,num_neurons='1',name='zero')
+    one  = lbann.Constant(value=1.0,num_neurons='1',name='one')
+
+
+    z = lbann.Gaussian(mean=0.0,stdev=1.0, neuron_dims="20")
+    wae = macc_network_architectures.MACCWAE(zdim,ydim,cf=wae_mcf,use_CNN=useCNN) #pretrained, freeze
+    inv = macc_network_architectures.MACCInverse(xdim,cf=surrogate_mcf)
+    fwd = macc_network_architectures.MACCForward(zdim,cf=surrogate_mcf)
+
+
+    y_pred_fwd = wae.encoder(gt_y)
+
+    param_pred_ = wae.encoder(gt_y)
+    input_fake = inv(param_pred_)
+
+    output_cyc = fwd(input_fake)
+    y_image_re2  = wae.decoder(output_cyc)
+
+    '''**** Train cycleGAN input params <--> latent space of (images, scalars) ****'''
+    output_fake = fwd(gt_x)
+    y_image_re = wae.decoder(output_fake)
+
+    param_pred2_ = wae.encoder(y_image_re)
+    input_cyc = inv(param_pred2_)
+
+    L_l2_x =  lbann.MeanSquaredError(input_fake,gt_x)
+    L_cyc_x = lbann.MeanSquaredError(input_cyc,gt_x)
+
+    L_l2_y =  lbann.MeanSquaredError(output_fake,y_pred_fwd)
+    L_cyc_y = lbann.MeanSquaredError(output_cyc,y_pred_fwd)
+
+
+    #@todo slice here to separate scalar from image
+    img_sca_loss = lbann.MeanSquaredError(y_image_re,gt_y)
+    #L_cyc = L_cyc_y + L_cyc_x
+    L_cyc = lbann.Add(L_cyc_y, L_cyc_x)
+
+    #loss_gen0  = L_l2_y + lamda_cyc*L_cyc
+    loss_gen0  = lbann.WeightedSum([L_l2_y,L_cyc], scaling_factors=f'1 {lambda_cyc}')
+    loss_gen1  = lbann.WeightedSum([L_l2_x,L_cyc_y], scaling_factors=f'1 {lambda_cyc}')
+    #loss_gen1  =  L_l2_x + lamda_cyc*L_cyc_y
+
+
+    layers = list(lbann.traverse_layer_graph(input))
+    weights = set()
+    #Freeze appropriate (pretrained) weights
+    pretrained_models = ["wae"]  #add macc?
+    for l in layers:
+      for idx in range(len(pretrained_models)):
+        if(l.weights and pretrained_models[idx] in l.name):
+          for w in range(len(l.weights)):
+            l.weights[w].optimizer = lbann.NoOptimizer()
+      weights.update(l.weights)
+
+    l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4)
+    #d_adv_bce = lbann.LayerTerm(d_adv_bce,scale=0.01)
+    # Setup objective function
+    obj = lbann.ObjectiveFunction([loss_gen0,loss_gen1,l2_reg])
+    # Initialize check metric callback
+    metrics = [lbann.Metric(img_sca_loss, name='fw_loss'),
+               lbann.Metric(L_l2_x, name='inverse loss'),
+               lbann.Metric(L_cyc_y, name='output cycle loss'),
+               lbann.Metric(L_cyc_x, name='param cycle loss')]
+
+    callbacks = [lbann.CallbackPrint(),
+                 lbann.CallbackSaveModel(dir=dump_models),
+                 lbann.CallbackLoadModel(dirs=str(pretrained_dir)),
+                 lbann.CallbackTimer()]
+
+    if(ltfb_batch_interval > 0) :
+      callbacks.append(lbann.CallbackLTFB(batch_interval=ltfb_batch_interval,metric='fw_loss',
+                                    low_score_wins=True,
+                                    exchange_hyperparameters=True))
+    # Construct model
+    return lbann.Model(num_epochs,
+                       weights=weights,
+                       layers=layers,
+                       metrics=metrics,
+                       objective_function=obj,
+                       callbacks=callbacks)
diff --git a/applications/physics/ICF/pre_train_jag_wae.py b/applications/physics/ICF/pre_train_jag_wae.py
index 1cb424d69cc..cd3805e2161 100644
--- a/applications/physics/ICF/pre_train_jag_wae.py
+++ b/applications/physics/ICF/pre_train_jag_wae.py
@@ -50,10 +50,10 @@
     help='use CNN', metavar='BOOL')
 parser.add_argument(
     '--sample-list-train', action='store', default='/p/vast1/lbann/datasets/JAG/10MJAG/1M_A/index.txt', type=str,
-    help='index list (default index.txt)', metavar='NAME')
+    help='sample list (default index.txt)', metavar='NAME')
 parser.add_argument(
     '--sample-list-test', action='store', default='/p/vast1/lbann/datasets/JAG/10MJAG/1M_B/t0_sample_list_multi_10K.txt', type=str,
-    help='index list (default t0_sample_list_multi_10K.txt, 100 samples)', metavar='NAME')
+    help='sample list (default t0_sample_list_multi_10K.txt, 100 samples)', metavar='NAME')
 parser.add_argument(
     '--dump-outputs', action='store', default='dump_outs', type=str,
     help='dump outputs dir (default: jobdir/dump_outs)', metavar='NAME')
@@ -73,7 +73,13 @@
 
     trainer = lbann.Trainer(mini_batch_size=args.mini_batch_size,
                             serialize_io=True)
-    model = macc_trainable_models.construct_jag_wae_model(args.ydim, args.zdim, args.mcf, args.useCNN, args.dump_models, args.ltfb_batch_interval, args.num_epochs)
+    model = macc_trainable_models.construct_jag_wae_model(ydim=args.ydim,
+                                                          zdim=args.zdim,
+                                                          mcf=args.mcf,
+                                                          useCNN=args.useCNN,
+                                                          dump_models=args.dump_models,
+                                                          ltfb_batch_interval=args.ltfb_batch_interval,
+                                                          num_epochs=args.num_epochs)
     # Setup optimizer
     opt = lbann.Adam(learn_rate=0.0001,beta1=0.9,beta2=0.99,eps=1e-8)
     # Load data reader from prototext
@@ -91,7 +97,7 @@
                        job_name=args.job_name,
                        lbann_args=['--use_data_store --preload_data_store',
                                    f'--metadata={metadata_prototext}',
-                                   f'--data_reader_percent=0.01',
+                                   f'--data_reader_percent=0.1',
                                    f'--sample_list_train={args.sample_list_train}',
                                    f'--sample_list_test={args.sample_list_test}',
                                    f'--procs_per_trainer={args.procs_per_trainer}'],
diff --git a/applications/physics/ICF/train_macc_surrogate.py b/applications/physics/ICF/train_macc_surrogate.py
index f633d1fac9a..6151f6bc79b 100644
--- a/applications/physics/ICF/train_macc_surrogate.py
+++ b/applications/physics/ICF/train_macc_surrogate.py
@@ -1,4 +1,4 @@
-import macc_models
+import macc_trainable_models
 import argparse
 from os.path import abspath, dirname, join
 import google.protobuf.text_format as txtf
@@ -59,23 +59,17 @@
     '--surrogate_mcf', action='store', default=1, type=int,
     help='model capacity factor (default: 1)', metavar='NUM')
 parser.add_argument(
-    '--lamda-cyc', action='store', default=1e-3, type=float,
-    help='lamda-cyc (default: 1e-3)', metavar='NUM')
+    '--lambda-cyc', action='store', default=1e-3, type=float,
+    help='lambda-cyc (default: 1e-3)', metavar='NUM')
 parser.add_argument(
     '--useCNN', action='store', default=False, type=bool,
     help='use CNN', metavar='BOOL')
 parser.add_argument(
-    '--data-filedir-train', action='store', default='/p/gpfs1/brainusr/datasets/10MJAG/1M_A/', type=str,
-    help='data filedir (default train dir is 10MJAG/1M_A)', metavar='NAME')
+    '--sample-list-train', action='store', default='/p/vast1/lbann/datasets/JAG/10MJAG/1M_A/index.txt', type=str,
+    help='sample list (default index.txt)', metavar='NAME')
 parser.add_argument(
-    '--data-filedir-test', action='store', default='/p/gpfs1/brainusr/datasets/10MJAG/1M_B/', type=str,
-    help='data filedir (default test dir is 10MJAG/1M_B)', metavar='NAME')
-parser.add_argument(
-    '--index-list-train', action='store', default='index.txt', type=str,
-    help='index list (default index.txt)', metavar='NAME')
-parser.add_argument(
-    '--index-list-test', action='store', default='t0_sample_list_multi_10K.txt', type=str,
-    help='index list (default t0_sample_list_multi_10K.txt, 100 samples)', metavar='NAME')
+    '--sample-list-test', action='store', default='/p/vast1/lbann/datasets/JAG/10MJAG/1M_B/t0_sample_list_multi_10K.txt', type=str,
+    help='sample list (default t0_sample_list_multi_10K.txt, 100 samples)', metavar='NAME')
 parser.add_argument(
     '--dump-outputs', action='store', default='dump_outs', type=str,
     help='dump outputs dir (default: jobdir/dump_outs)', metavar='NAME')
@@ -99,109 +93,23 @@
 def list2str(l):
     return ' '.join(l)
 
-def construct_model():
-    """Construct MACC surrogate model.
-
-    See https://arxiv.org/pdf/1912.08113.pdf model architecture and other details
-
-    """
-    import lbann
-
-    # Layer graph
-    input = lbann.Input(data_field='samples',name='inp_data')
-    # data is 64*64*4 images + 15 scalar + 5 param
-    inp_slice = lbann.Slice(input, axis=0, slice_points=str_list([0,args.ydim,args.ydim+args.xdim]),name='inp_slice')
-    gt_y = lbann.Identity(inp_slice,name='gt_y')
-    gt_x = lbann.Identity(inp_slice, name='gt_x') #param not used
-
-    zero  = lbann.Constant(value=0.0,num_neurons='1',name='zero')
-    one  = lbann.Constant(value=1.0,num_neurons='1',name='one')
-
-
-    z = lbann.Gaussian(mean=0.0,stdev=1.0, neuron_dims="20")
-    wae = macc_models.MACCWAE(args.zdim,args.ydim,cf=args.wae_mcf,use_CNN=args.useCNN) #pretrained, freeze
-    inv = macc_models.MACCInverse(args.xdim,cf=args.surrogate_mcf)
-    fwd = macc_models.MACCForward(args.zdim,cf=args.surrogate_mcf)
-
-
-    y_pred_fwd = wae.encoder(gt_y)
-
-    param_pred_ = wae.encoder(gt_y)
-    input_fake = inv(param_pred_)
-
-    output_cyc = fwd(input_fake)
-    y_image_re2  = wae.decoder(output_cyc)
-
-    '''**** Train cycleGAN input params <--> latent space of (images, scalars) ****'''
-    output_fake = fwd(gt_x)
-    y_image_re = wae.decoder(output_fake)
-
-    param_pred2_ = wae.encoder(y_image_re)
-    input_cyc = inv(param_pred2_)
-
-    L_l2_x =  lbann.MeanSquaredError(input_fake,gt_x)
-    L_cyc_x = lbann.MeanSquaredError(input_cyc,gt_x)
-
-    L_l2_y =  lbann.MeanSquaredError(output_fake,y_pred_fwd)
-    L_cyc_y = lbann.MeanSquaredError(output_cyc,y_pred_fwd)
-
-
-    #@todo slice here to separate scalar from image
-    img_sca_loss = lbann.MeanSquaredError(y_image_re,gt_y)
-    #L_cyc = L_cyc_y + L_cyc_x
-    L_cyc = lbann.Add(L_cyc_y, L_cyc_x)
-
-    #loss_gen0  = L_l2_y + lamda_cyc*L_cyc
-    loss_gen0  = lbann.WeightedSum([L_l2_y,L_cyc], scaling_factors=f'1 {args.lamda_cyc}')
-    loss_gen1  = lbann.WeightedSum([L_l2_x,L_cyc_y], scaling_factors=f'1 {args.lamda_cyc}')
-    #loss_gen1  =  L_l2_x + lamda_cyc*L_cyc_y
-
-
-    layers = list(lbann.traverse_layer_graph(input))
-    weights = set()
-    #Freeze appropriate (pretrained) weights
-    pretrained_models = ["wae"]  #add macc?
-    for l in layers:
-      for idx in range(len(pretrained_models)):
-        if(l.weights and pretrained_models[idx] in l.name):
-          for w in range(len(l.weights)):
-            l.weights[w].optimizer = lbann.NoOptimizer()
-      weights.update(l.weights)
-
-    l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4)
-    #d_adv_bce = lbann.LayerTerm(d_adv_bce,scale=0.01)
-    # Setup objective function
-    obj = lbann.ObjectiveFunction([loss_gen0,loss_gen1,l2_reg])
-    # Initialize check metric callback
-    metrics = [lbann.Metric(img_sca_loss, name='fw_loss'),
-               lbann.Metric(L_l2_x, name='inverse loss'),
-               lbann.Metric(L_cyc_y, name='output cycle loss'),
-               lbann.Metric(L_cyc_x, name='param cycle loss')]
-
-    callbacks = [lbann.CallbackPrint(),
-                 lbann.CallbackSaveModel(dir=args.dump_models),
-                 lbann.CallbackLoadModel(dirs=str(args.pretrained_dir)),
-                 lbann.CallbackTimer()]
-
-    if(args.ltfb_batch_interval > 0) :
-      callbacks.append(lbann.CallbackLTFB(batch_interval=args.ltfb_batch_interval,metric='fw_loss',
-                                    low_score_wins=True,
-                                    exchange_hyperparameters=True))
-    # Construct model
-    return lbann.Model(args.num_epochs,
-                       weights=weights,
-                       serialize_io=True,
-                       layers=layers,
-                       metrics=metrics,
-                       objective_function=obj,
-                       callbacks=callbacks)
-
 
 if __name__ == '__main__':
     import lbann
 
-    trainer = lbann.Trainer(mini_batch_size=args.mini_batch_size)
-    model = construct_model()
+    trainer = lbann.Trainer(mini_batch_size=args.mini_batch_size,
+                            serialize_io=True)
+    model = macc_trainable_models.construct_macc_surrogate_model(xdim=args.xdim,
+                                                                 ydim=args.ydim,
+                                                                 zdim=args.zdim,
+                                                                 wae_mcf=args.wae_mcf,
+                                                                 surrogate_mcf=args.surrogate_mcf,
+                                                                 lambda_cyc=args.lambda_cyc,
+                                                                 useCNN=args.useCNN,
+                                                                 dump_models=args.dump_models,
+                                                                 pretrained_dir=args.pretrained_dir,
+                                                                 ltfb_batch_interval=args.ltfb_batch_interval,
+                                                                 num_epochs=args.num_epochs)
     # Setup optimizer
     opt = lbann.Adam(learn_rate=0.0001,beta1=0.9,beta2=0.99,eps=1e-8)
     # Load data reader from prototext
@@ -212,19 +120,22 @@ def construct_model():
 
     kwargs = lbann.contrib.args.get_scheduler_kwargs(args)
     status = lbann.contrib.launcher.run(trainer,model, data_reader_proto, opt,
-                       scheduler='lsf',
+#                       scheduler='lsf',
                        nodes=args.num_nodes,
-                       procs_per_node=args.ppn,
-                       partition='pbatch',
+#                       procs_per_node=args.ppn,
+#                       partition='pbatch',
                        time_limit=480,
-                       setup_only=False,
+#                       setup_only=False,
                        job_name=args.job_name,
                        lbann_args=['--preload_data_store --use_data_store',
                                    f'--metadata={metadata_prototext}',
-                                   f'--index_list_train={args.index_list_train}',
-                                   f'--index_list_test={args.index_list_test}',
-                                   f'--data_filedir_train={args.data_filedir_train}',
-                                   f'--data_filedir_test={args.data_filedir_test}',
-                                   f'--procs_per_trainer={run_args.procs_per_trainer}'],
+                                   f'--data_reader_percent=0.1',
+                                   f'--sample_list_train={args.sample_list_train}',
+                                   f'--sample_list_test={args.sample_list_test}',
+#                                   f'--index_list_train={args.index_list_train}',
+#                                   f'--index_list_test={args.index_list_test}',
+#                                   f'--data_filedir_train={args.data_filedir_train}',
+#                                   f'--data_filedir_test={args.data_filedir_test}',
+                                   f'--procs_per_trainer={args.procs_per_trainer}'],
                                    **kwargs)
     print(status)
diff --git a/bamboo/integration_tests/test_integration_pre_train_jag_wae.py b/bamboo/integration_tests/test_integration_pre_train_jag_wae.py
index d6755e3acb1..b2a8c958dc7 100644
--- a/bamboo/integration_tests/test_integration_pre_train_jag_wae.py
+++ b/bamboo/integration_tests/test_integration_pre_train_jag_wae.py
@@ -91,7 +91,13 @@ def setup_experiment(lbann):
     import macc_trainable_models
     dump_models = 'dump_models'
     ltfb_batch_interval = 0
-    model = macc_trainable_models.construct_jag_wae_model(ydim, zdim, mcf, useCNN, dump_models, ltfb_batch_interval, num_epochs)
+    model = macc_trainable_models.construct_jag_wae_model(ydim=ydim,
+                                                          zdim=zdim,
+                                                          mcf=mcf,
+                                                          useCNN=useCNN,
+                                                          dump_models=dump_models,
+                                                          ltfb_batch_interval=ltfb_batch_interval,
+                                                          num_epochs=num_epochs)
 
     # Setup optimizer
     opt = lbann.Adam(learn_rate=0.0001,beta1=0.9,beta2=0.99,eps=1e-8)
@@ -131,8 +137,8 @@ def augment_test_func(test_func):
     # Define test function
     def func(cluster, dirname,weekly):
 
-        if not weekly:
-            pytest.skip('This app runs {} with weekly builds only'.format(test_name))
+#        if not weekly:
+#            pytest.skip('This app runs {} with weekly builds only'.format(test_name))
 
         # Run LBANN experiment
         experiment_output = test_func(cluster, dirname)
diff --git a/bamboo/integration_tests/test_integration_train_macc_surrogate.py b/bamboo/integration_tests/test_integration_train_macc_surrogate.py
new file mode 100644
index 00000000000..4cd441ed860
--- /dev/null
+++ b/bamboo/integration_tests/test_integration_train_macc_surrogate.py
@@ -0,0 +1,203 @@
+import functools
+import operator
+import os
+import os.path
+import re
+import sys
+import numpy as np
+import google.protobuf.text_format
+import pytest
+from os.path import abspath, dirname, join, realpath
+import tools
+
+# Local files
+current_file = realpath(__file__)
+lbann_dir = dirname(os.path.dirname(os.path.dirname(current_file)))
+app_path = join(lbann_dir, 'applications', 'physics','ICF')
+sys.path.append(app_path)
+
+# ==============================================
+# Options
+# ==============================================
+
+# Training options
+num_epochs = 10
+mini_batch_size = 128
+num_nodes = 1
+procs_per_node = 2 # Only use 2 GPUs to ensure comparable testing between lassen and pascal
+                   # this model is very sensitive to differences in how it is initialized
+                   # and parallelized
+
+model_zoo_dir = dirname(app_path)
+data_reader_prototext = join(model_zoo_dir,
+                             'data',
+                             'jag_conduit_reader.prototext')
+metadata_prototext = join(model_zoo_dir,
+                             'data',
+                             'jag_100M_metadata.prototext')
+
+xdim = 5 # input (x) dim (default: 5)
+ydim = 16399 # image+scalar dim (default: 64*64*4+15=16399)
+zdim = 20 # latent space dim (default: 20)
+wae_mcf = 1 # model capacity factor (default: 1)
+surrogate_mcf = 1 # model capacity factor (default: 1)
+lambda_cyc = 1e-3 # lambda-cyc (default: 1e-3)
+
+useCNN = False
+
+# Reconstruction loss
+expected_train_range = (0.66, 0.68)
+expected_test_range = (0.83, 0.84)
+
+# Average mini-batch time (in sec) for each LC system
+# Note that run times are with LBANN_DETERMINISTIC set
+# Commented out times are prior to thread safe RNGs
+expected_mini_batch_times = {
+    'lassen':   0.0530066,
+    'pascal':   0.123863,
+}
+# ==============================================
+# Setup LBANN experiment
+# ==============================================
+
+def make_data_reader(lbann):
+    """Make Protobuf message for HRRL  data reader.
+
+    """
+    import lbann.contrib.lc.paths
+
+    # Load data readers from prototext
+    message = lbann.lbann_pb2.LbannPB()
+    with open(data_reader_prototext, 'r') as f:
+        google.protobuf.text_format.Merge(f.read(), message)
+    message = message.data_reader
+
+    # Use less training data for the integration test
+    message.reader[0].percent_of_data_to_use = 0.01
+
+    # Set paths
+    return message
+
+def setup_experiment(lbann):
+    """Construct LBANN experiment.
+
+    Args:
+        lbann (module): Module for LBANN Python frontend
+
+    """
+    if tools.system(lbann) != 'lassen' and tools.system(lbann) != 'pascal':
+      message = f'{os.path.basename(__file__)} is only supported on lassen and pascal systems'
+      print('Skip - ' + message)
+      pytest.skip(message)
+
+    trainer = lbann.Trainer(mini_batch_size=mini_batch_size,
+                            serialize_io=True)
+    import macc_trainable_models
+    dump_models = 'dump_models'
+    ltfb_batch_interval = 0
+    pretrained_dir = ' '
+    model = macc_trainable_models.construct_macc_surrogate_model(xdim=xdim,
+                                                                 ydim=ydim,
+                                                                 zdim=zdim,
+                                                                 wae_mcf=wae_mcf,
+                                                                 surrogate_mcf=surrogate_mcf,
+                                                                 lambda_cyc=lambda_cyc,
+                                                                 useCNN=useCNN,
+                                                                 dump_models=dump_models,
+                                                                 pretrained_dir=pretrained_dir,
+                                                                 ltfb_batch_interval=ltfb_batch_interval,
+                                                                 num_epochs=num_epochs)
+ 
+    # Setup optimizer
+    opt = lbann.Adam(learn_rate=0.0001,beta1=0.9,beta2=0.99,eps=1e-8)
+    # Load data reader from prototext
+    data_reader = make_data_reader(lbann)
+
+    return trainer, model, data_reader, opt
+
+# ==============================================
+# Setup PyTest
+# ==============================================
+
+def augment_test_func(test_func):
+    """Augment test function to parse log files.
+
+    `tools.create_tests` creates functions that run an LBANN
+    experiment. This function creates augmented functions that parse
+    the log files after LBANN finishes running, e.g. to check metrics
+    or runtimes.
+
+    Note: The naive approach is to define the augmented test functions
+    in a loop. However, Python closures are late binding. In other
+    words, the function would be overwritten every time we define it.
+    We get around this overwriting problem by defining the augmented
+    function in the local scope of another function.
+
+    Args:
+        test_func (function): Test function created by
+            `tools.create_tests`.
+
+    Returns:
+        function: Test that can interact with PyTest.
+
+    """
+    test_name = test_func.__name__
+
+    # Define test function
+    def func(cluster, dirname,weekly):
+
+#        if not weekly:
+#            pytest.skip('This app runs {} with weekly builds only'.format(test_name))
+
+        # Run LBANN experiment
+        experiment_output = test_func(cluster, dirname)
+
+        # Parse LBANN log file
+        train_pc = None
+        test_pc = None
+        mini_batch_times = []
+        with open(experiment_output['stdout_log_file']) as f:
+            for line in f:
+                match = re.search('training epoch [0-9]+ output cycle loss : ([0-9.]+)', line)
+                if match:
+                    train_pc = float(match.group(1))
+                match = re.search('test output cycle loss : ([0-9.]+)', line)
+                if match:
+                    test_pc = float(match.group(1))
+                match = re.search('training epoch [0-9]+ mini-batch time statistics : ([0-9.]+)s mean', line)
+                if match:
+                    mini_batch_times.append(float(match.group(1)))
+
+        # Check if training reconstruction is within expected range
+        assert (expected_train_range[0]
+                < train_pc
+                < expected_train_range[1]), \
+                'train reconstruction error is outside expected range'
+
+        # Check if testing reconstruction  is within expected range
+        assert (expected_test_range[0]
+                < test_pc
+                < expected_test_range[1]), \
+                'test reconstruction error is outside expected range'
+
+        # Check if mini-batch time is within expected range
+        # Note: Skip first epoch since its runtime is usually an outlier
+        mini_batch_times = mini_batch_times[1:]
+        mini_batch_time = sum(mini_batch_times) / len(mini_batch_times)
+        assert (0.75 * expected_mini_batch_times[cluster]
+                < mini_batch_time
+                < 1.25 * expected_mini_batch_times[cluster]), \
+                'average mini-batch time is outside expected range'
+
+    # Return test function from factory function
+    func.__name__ = test_name
+    return func
+
+m_lbann_args=f"--use_data_store --preload_data_store --metadata={metadata_prototext}"
+# Create test functions that can interact with PyTest
+for _test_func in tools.create_tests(setup_experiment,
+                                     __file__,
+                                     lbann_args=[m_lbann_args],
+                                     procs_per_node=procs_per_node,
+                                     nodes=num_nodes):
+    globals()[_test_func.__name__] = augment_test_func(_test_func)

From 28ed6f0058ddf1735d15a4bdc64b6f05ebf00eab Mon Sep 17 00:00:00 2001
From: "Brian C. Van Essen" <vanessen1@llnl.gov>
Date: Fri, 12 Nov 2021 11:51:29 -0800
Subject: [PATCH 36/37] Added an integration test for the older JAG model. 
 Refactored the model definition into a separate file for testing.  Renamed
 the MACC models file.

---
 ...models.py => jag_network_architectures.py} |   0
 .../physics/ICF/jag_trainable_models.py       |  72 +++++++
 ...acc_trainable_models.py => macc_models.py} |   0
 applications/physics/ICF/pre_train_jag_wae.py |  19 +-
 applications/physics/ICF/train_jag_wae.py     |  83 +-------
 .../physics/ICF/train_macc_surrogate.py       |  33 ++-
 .../test_integration_jag_wae.py               | 188 ++++++++++++++++++
 ...=> test_integration_pre_train_macc_wae.py} |  16 +-
 .../test_integration_train_macc_surrogate.py  |  24 +--
 9 files changed, 310 insertions(+), 125 deletions(-)
 rename applications/physics/ICF/{jag_models.py => jag_network_architectures.py} (100%)
 create mode 100644 applications/physics/ICF/jag_trainable_models.py
 rename applications/physics/ICF/{macc_trainable_models.py => macc_models.py} (100%)
 create mode 100644 bamboo/integration_tests/test_integration_jag_wae.py
 rename bamboo/integration_tests/{test_integration_pre_train_jag_wae.py => test_integration_pre_train_macc_wae.py} (91%)

diff --git a/applications/physics/ICF/jag_models.py b/applications/physics/ICF/jag_network_architectures.py
similarity index 100%
rename from applications/physics/ICF/jag_models.py
rename to applications/physics/ICF/jag_network_architectures.py
diff --git a/applications/physics/ICF/jag_trainable_models.py b/applications/physics/ICF/jag_trainable_models.py
new file mode 100644
index 00000000000..b845d7c2862
--- /dev/null
+++ b/applications/physics/ICF/jag_trainable_models.py
@@ -0,0 +1,72 @@
+import lbann
+import jag_network_architectures
+from lbann.util import str_list, list2str
+
+def construct_jag_wae_model(y_dim,
+                            z_dim,
+#                            mcf,
+#                            useCNN,
+#                            dump_models,
+#                            ltfb_batch_interval,
+                            num_epochs
+                            ):
+    """Construct LBANN model.
+
+    JAG Wasserstein autoencoder  model
+
+    """
+    import lbann
+
+    # Layer graph
+    input = lbann.Input(data_field='samples',name='inp_data')
+    # data is 64*64*4 images + 15 scalar + 5 param
+    inp_slice = lbann.Slice(input, axis=0, slice_points="0 16399 16404",name='inp_slice')
+    gt_y = lbann.Identity(inp_slice,name='gt_y')
+    gt_x = lbann.Identity(inp_slice, name='gt_x') #param not used
+
+    zero  = lbann.Constant(value=0.0,num_neurons='1',name='zero')
+    one  = lbann.Constant(value=1.0,num_neurons='1',name='one')
+
+    z = lbann.Gaussian(mean=0.0,stdev=1.0, neuron_dims="20")
+    d1_real, d1_fake, d_adv, pred_y  = jag_network_architectures.WAE(z_dim,y_dim)(z,gt_y)
+
+    d1_real_bce = lbann.SigmoidBinaryCrossEntropy([d1_real,one],name='d1_real_bce')
+    d1_fake_bce = lbann.SigmoidBinaryCrossEntropy([d1_fake,zero],name='d1_fake_bce')
+    d_adv_bce = lbann.SigmoidBinaryCrossEntropy([d_adv,one],name='d_adv_bce')
+
+    img_loss = lbann.MeanSquaredError([pred_y,gt_y])
+    rec_error = lbann.L2Norm2(lbann.WeightedSum([pred_y,gt_y], scaling_factors="1 -1"))
+
+    layers = list(lbann.traverse_layer_graph(input))
+    # Setup objective function
+    weights = set()
+    src_layers = []
+    dst_layers = []
+    for l in layers:
+      if(l.weights and "disc0" in l.name and "instance1" in l.name):
+        src_layers.append(l.name)
+      #freeze weights in disc2
+      if(l.weights and "disc1" in l.name):
+        dst_layers.append(l.name)
+        for idx in range(len(l.weights)):
+          l.weights[idx].optimizer = lbann.NoOptimizer()
+      weights.update(l.weights)
+    l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4)
+    d_adv_bce = lbann.LayerTerm(d_adv_bce,scale=0.01)
+    obj = lbann.ObjectiveFunction([d1_real_bce,d1_fake_bce,d_adv_bce,img_loss,rec_error,l2_reg])
+    # Initialize check metric callback
+    metrics = [lbann.Metric(img_loss, name='recon_error')]
+
+    callbacks = [lbann.CallbackPrint(),
+                 lbann.CallbackTimer(),
+                 lbann.CallbackReplaceWeights(source_layers=list2str(src_layers),
+                                      destination_layers=list2str(dst_layers),
+                                      batch_interval=2)]
+
+    # Construct model
+    return lbann.Model(num_epochs,
+                       weights=weights,
+                       layers=layers,
+                       metrics=metrics,
+                       objective_function=obj,
+                       callbacks=callbacks)
diff --git a/applications/physics/ICF/macc_trainable_models.py b/applications/physics/ICF/macc_models.py
similarity index 100%
rename from applications/physics/ICF/macc_trainable_models.py
rename to applications/physics/ICF/macc_models.py
diff --git a/applications/physics/ICF/pre_train_jag_wae.py b/applications/physics/ICF/pre_train_jag_wae.py
index cd3805e2161..2f4c81c821d 100644
--- a/applications/physics/ICF/pre_train_jag_wae.py
+++ b/applications/physics/ICF/pre_train_jag_wae.py
@@ -1,4 +1,4 @@
-import macc_trainable_models
+import macc_models
 import argparse
 from os.path import abspath, dirname, join
 import google.protobuf.text_format as txtf
@@ -73,13 +73,13 @@
 
     trainer = lbann.Trainer(mini_batch_size=args.mini_batch_size,
                             serialize_io=True)
-    model = macc_trainable_models.construct_jag_wae_model(ydim=args.ydim,
-                                                          zdim=args.zdim,
-                                                          mcf=args.mcf,
-                                                          useCNN=args.useCNN,
-                                                          dump_models=args.dump_models,
-                                                          ltfb_batch_interval=args.ltfb_batch_interval,
-                                                          num_epochs=args.num_epochs)
+    model = macc_models.construct_jag_wae_model(ydim=args.ydim,
+                                                zdim=args.zdim,
+                                                mcf=args.mcf,
+                                                useCNN=args.useCNN,
+                                                dump_models=args.dump_models,
+                                                ltfb_batch_interval=args.ltfb_batch_interval,
+                                                num_epochs=args.num_epochs)
     # Setup optimizer
     opt = lbann.Adam(learn_rate=0.0001,beta1=0.9,beta2=0.99,eps=1e-8)
     # Load data reader from prototext
@@ -91,13 +91,10 @@
     kwargs = lbann.contrib.args.get_scheduler_kwargs(args)
     status = lbann.contrib.launcher.run(trainer,model, data_reader_proto, opt,
                        nodes=args.num_nodes,
-#                       procs_per_node=args.ppn,
                        time_limit=720,
-#                       setup_only=True,
                        job_name=args.job_name,
                        lbann_args=['--use_data_store --preload_data_store',
                                    f'--metadata={metadata_prototext}',
-                                   f'--data_reader_percent=0.1',
                                    f'--sample_list_train={args.sample_list_train}',
                                    f'--sample_list_test={args.sample_list_test}',
                                    f'--procs_per_trainer={args.procs_per_trainer}'],
diff --git a/applications/physics/ICF/train_jag_wae.py b/applications/physics/ICF/train_jag_wae.py
index 12266b58653..4469cb1241f 100644
--- a/applications/physics/ICF/train_jag_wae.py
+++ b/applications/physics/ICF/train_jag_wae.py
@@ -1,4 +1,4 @@
-import jag_models
+import jag_trainable_models
 from os.path import abspath, dirname, join
 import google.protobuf.text_format as txtf
 
@@ -12,83 +12,19 @@
                              'data',
                              'jag_100Kdata.prototext')
 
-
-def list2str(l):
-    return ' '.join(l)
-
-def construct_model():
-    """Construct LBANN model.
-
-    JAG Wasserstein autoencoder  model
-
-    """
+if __name__ == '__main__':
     import lbann
 
-    # Layer graph
-    input = lbann.Input(data_field='samples',name='inp_data')
-    # data is 64*64*4 images + 15 scalar + 5 param
-    inp_slice = lbann.Slice(input, axis=0, slice_points="0 16399 16404",name='inp_slice')
-    gt_y = lbann.Identity(inp_slice,name='gt_y')
-    gt_x = lbann.Identity(inp_slice, name='gt_x') #param not used
-
-    zero  = lbann.Constant(value=0.0,num_neurons='1',name='zero')
-    one  = lbann.Constant(value=1.0,num_neurons='1',name='one')
-
     y_dim = 16399 #image+scalar shape
     z_dim = 20  #Latent space dim
-
-    z = lbann.Gaussian(mean=0.0,stdev=1.0, neuron_dims="20")
-    d1_real, d1_fake, d_adv, pred_y  = jag_models.WAE(z_dim,y_dim)(z,gt_y)
-
-    d1_real_bce = lbann.SigmoidBinaryCrossEntropy([d1_real,one],name='d1_real_bce')
-    d1_fake_bce = lbann.SigmoidBinaryCrossEntropy([d1_fake,zero],name='d1_fake_bce')
-    d_adv_bce = lbann.SigmoidBinaryCrossEntropy([d_adv,one],name='d_adv_bce')
-
-    img_loss = lbann.MeanSquaredError([pred_y,gt_y])
-    rec_error = lbann.L2Norm2(lbann.WeightedSum([pred_y,gt_y], scaling_factors="1 -1"))
-
-    layers = list(lbann.traverse_layer_graph(input))
-    # Setup objective function
-    weights = set()
-    src_layers = []
-    dst_layers = []
-    for l in layers:
-      if(l.weights and "disc0" in l.name and "instance1" in l.name):
-        src_layers.append(l.name)
-      #freeze weights in disc2
-      if(l.weights and "disc1" in l.name):
-        dst_layers.append(l.name)
-        for idx in range(len(l.weights)):
-          l.weights[idx].optimizer = lbann.NoOptimizer()
-      weights.update(l.weights)
-    l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4)
-    d_adv_bce = lbann.LayerTerm(d_adv_bce,scale=0.01)
-    obj = lbann.ObjectiveFunction([d1_real_bce,d1_fake_bce,d_adv_bce,img_loss,rec_error,l2_reg])
-    # Initialize check metric callback
-    metrics = [lbann.Metric(img_loss, name='recon_error')]
-
-    callbacks = [lbann.CallbackPrint(),
-                 lbann.CallbackTimer(),
-                 lbann.CallbackReplaceWeights(source_layers=list2str(src_layers),
-                                      destination_layers=list2str(dst_layers),
-                                      batch_interval=2)]
-
-    # Construct model
     num_epochs = 100
-    return lbann.Model(num_epochs,
-                       weights=weights,
-                       layers=layers,
-                       metrics=metrics,
-                       objective_function=obj,
-                       callbacks=callbacks)
-
-
-if __name__ == '__main__':
-    import lbann
-
     mini_batch_size = 128
-    trainer = lbann.Trainer(mini_batch_size=mini_batch_size)
-    model = construct_model()
+    trainer = lbann.Trainer(mini_batch_size=mini_batch_size,
+                            serialize_io=True)
+#    model = construct_model()
+    model = jag_trainable_models.construct_jag_wae_model(y_dim=y_dim,
+                                               z_dim=z_dim,
+                                               num_epochs=num_epochs)
     # Setup optimizer
     opt = lbann.Adam(learn_rate=0.0001,beta1=0.9,beta2=0.99,eps=1e-8)
     # Load data reader from prototext
@@ -101,7 +37,8 @@ def construct_model():
                        scheduler='slurm',
                        nodes=1,
                        procs_per_node=1,
+                       lbann_args=[f'--data_reader_percent=0.1'],
                        time_limit=360,
-                       setup_only=True,
+#                       setup_only=True,
                        job_name='jag_wae')
     print(status)
diff --git a/applications/physics/ICF/train_macc_surrogate.py b/applications/physics/ICF/train_macc_surrogate.py
index 6151f6bc79b..2e989195386 100644
--- a/applications/physics/ICF/train_macc_surrogate.py
+++ b/applications/physics/ICF/train_macc_surrogate.py
@@ -1,4 +1,4 @@
-import macc_trainable_models
+import macc_models
 import argparse
 from os.path import abspath, dirname, join
 import google.protobuf.text_format as txtf
@@ -99,17 +99,17 @@ def list2str(l):
 
     trainer = lbann.Trainer(mini_batch_size=args.mini_batch_size,
                             serialize_io=True)
-    model = macc_trainable_models.construct_macc_surrogate_model(xdim=args.xdim,
-                                                                 ydim=args.ydim,
-                                                                 zdim=args.zdim,
-                                                                 wae_mcf=args.wae_mcf,
-                                                                 surrogate_mcf=args.surrogate_mcf,
-                                                                 lambda_cyc=args.lambda_cyc,
-                                                                 useCNN=args.useCNN,
-                                                                 dump_models=args.dump_models,
-                                                                 pretrained_dir=args.pretrained_dir,
-                                                                 ltfb_batch_interval=args.ltfb_batch_interval,
-                                                                 num_epochs=args.num_epochs)
+    model = macc_models.construct_macc_surrogate_model(xdim=args.xdim,
+                                                       ydim=args.ydim,
+                                                       zdim=args.zdim,
+                                                       wae_mcf=args.wae_mcf,
+                                                       surrogate_mcf=args.surrogate_mcf,
+                                                       lambda_cyc=args.lambda_cyc,
+                                                       useCNN=args.useCNN,
+                                                       dump_models=args.dump_models,
+                                                       pretrained_dir=args.pretrained_dir,
+                                                       ltfb_batch_interval=args.ltfb_batch_interval,
+                                                       num_epochs=args.num_epochs)
     # Setup optimizer
     opt = lbann.Adam(learn_rate=0.0001,beta1=0.9,beta2=0.99,eps=1e-8)
     # Load data reader from prototext
@@ -120,22 +120,13 @@ def list2str(l):
 
     kwargs = lbann.contrib.args.get_scheduler_kwargs(args)
     status = lbann.contrib.launcher.run(trainer,model, data_reader_proto, opt,
-#                       scheduler='lsf',
                        nodes=args.num_nodes,
-#                       procs_per_node=args.ppn,
-#                       partition='pbatch',
                        time_limit=480,
-#                       setup_only=False,
                        job_name=args.job_name,
                        lbann_args=['--preload_data_store --use_data_store',
                                    f'--metadata={metadata_prototext}',
-                                   f'--data_reader_percent=0.1',
                                    f'--sample_list_train={args.sample_list_train}',
                                    f'--sample_list_test={args.sample_list_test}',
-#                                   f'--index_list_train={args.index_list_train}',
-#                                   f'--index_list_test={args.index_list_test}',
-#                                   f'--data_filedir_train={args.data_filedir_train}',
-#                                   f'--data_filedir_test={args.data_filedir_test}',
                                    f'--procs_per_trainer={args.procs_per_trainer}'],
                                    **kwargs)
     print(status)
diff --git a/bamboo/integration_tests/test_integration_jag_wae.py b/bamboo/integration_tests/test_integration_jag_wae.py
new file mode 100644
index 00000000000..bdf43e5ec2a
--- /dev/null
+++ b/bamboo/integration_tests/test_integration_jag_wae.py
@@ -0,0 +1,188 @@
+import functools
+import operator
+import os
+import os.path
+import re
+import sys
+import numpy as np
+import google.protobuf.text_format
+import pytest
+from os.path import abspath, dirname, join, realpath
+import tools
+
+# Local files
+current_file = realpath(__file__)
+lbann_dir = dirname(os.path.dirname(os.path.dirname(current_file)))
+app_path = join(lbann_dir, 'applications', 'physics','ICF')
+sys.path.append(app_path)
+
+# ==============================================
+# Options
+# ==============================================
+
+# Training options
+num_epochs = 10
+mini_batch_size = 128
+num_nodes = 1
+procs_per_node = 2 # Only use 2 GPUs to ensure comparable testing between lassen and pascal
+                   # this model is very sensitive to differences in how it is initialized
+                   # and parallelized
+
+model_zoo_dir = dirname(app_path)
+data_reader_prototext = join(model_zoo_dir,
+                             'data',
+                             'jag_conduit_reader.prototext')
+metadata_prototext = join(model_zoo_dir,
+                             'data',
+                             'jag_100M_metadata.prototext')
+
+y_dim = 16399 # image+scalar dim (default: 64*64*4+15=16399)
+z_dim = 20 # latent space dim (default: 20)
+
+# Reconstruction loss
+expected_train_pc_range = (20.1, 20.2)
+expected_test_pc_range = (19.3, 19.4)
+
+# Average mini-batch time (in sec) for each LC system
+# Note that run times are with LBANN_DETERMINISTIC set
+# Commented out times are prior to thread safe RNGs
+expected_mini_batch_times = {
+    'lassen':   0.0530066,
+    'pascal':   0.11119,
+}
+# ==============================================
+# Setup LBANN experiment
+# ==============================================
+
+def make_data_reader(lbann):
+    """Make Protobuf message for HRRL  data reader.
+
+    """
+    import lbann.contrib.lc.paths
+
+    # Load data readers from prototext
+    message = lbann.lbann_pb2.LbannPB()
+    with open(data_reader_prototext, 'r') as f:
+        google.protobuf.text_format.Merge(f.read(), message)
+    message = message.data_reader
+
+    # Use less training data for the integration test
+    message.reader[0].percent_of_data_to_use = 0.01
+
+    # Set paths
+    return message
+
+def setup_experiment(lbann):
+    """Construct LBANN experiment.
+
+    Args:
+        lbann (module): Module for LBANN Python frontend
+
+    """
+    if tools.system(lbann) != 'lassen' and tools.system(lbann) != 'pascal':
+      message = f'{os.path.basename(__file__)} is only supported on lassen and pascal systems'
+      print('Skip - ' + message)
+      pytest.skip(message)
+
+    trainer = lbann.Trainer(mini_batch_size=mini_batch_size,
+                            serialize_io=True)
+    import jag_trainable_models
+    dump_models = 'dump_models'
+    ltfb_batch_interval = 0
+    model = jag_trainable_models.construct_jag_wae_model(y_dim=y_dim,
+                                               z_dim=z_dim,
+                                               num_epochs=num_epochs)
+
+    # Setup optimizer
+    opt = lbann.Adam(learn_rate=0.0001,beta1=0.9,beta2=0.99,eps=1e-8)
+    # Load data reader from prototext
+    data_reader = make_data_reader(lbann)
+
+    return trainer, model, data_reader, opt
+
+# ==============================================
+# Setup PyTest
+# ==============================================
+
+def augment_test_func(test_func):
+    """Augment test function to parse log files.
+
+    `tools.create_tests` creates functions that run an LBANN
+    experiment. This function creates augmented functions that parse
+    the log files after LBANN finishes running, e.g. to check metrics
+    or runtimes.
+
+    Note: The naive approach is to define the augmented test functions
+    in a loop. However, Python closures are late binding. In other
+    words, the function would be overwritten every time we define it.
+    We get around this overwriting problem by defining the augmented
+    function in the local scope of another function.
+
+    Args:
+        test_func (function): Test function created by
+            `tools.create_tests`.
+
+    Returns:
+        function: Test that can interact with PyTest.
+
+    """
+    test_name = test_func.__name__
+
+    # Define test function
+    def func(cluster, dirname,weekly):
+
+#        if not weekly:
+#            pytest.skip('This app runs {} with weekly builds only'.format(test_name))
+
+        # Run LBANN experiment
+        experiment_output = test_func(cluster, dirname)
+
+        # Parse LBANN log file
+        train_pc = None
+        test_pc = None
+        mini_batch_times = []
+        with open(experiment_output['stdout_log_file']) as f:
+            for line in f:
+                match = re.search('training epoch [0-9]+ recon_error : ([0-9.]+)', line)
+                if match:
+                    train_pc = float(match.group(1))
+                match = re.search('test recon_error : ([0-9.]+)', line)
+                if match:
+                    test_pc = float(match.group(1))
+                match = re.search('training epoch [0-9]+ mini-batch time statistics : ([0-9.]+)s mean', line)
+                if match:
+                    mini_batch_times.append(float(match.group(1)))
+
+        # Check if training reconstruction is within expected range
+        assert (expected_train_pc_range[0]
+                < train_pc
+                < expected_train_pc_range[1]), \
+                'train reconstruction error is outside expected range'
+
+        # Check if testing reconstruction  is within expected range
+        assert (expected_test_pc_range[0]
+                < test_pc
+                < expected_test_pc_range[1]), \
+                'test reconstruction error is outside expected range'
+
+        # Check if mini-batch time is within expected range
+        # Note: Skip first epoch since its runtime is usually an outlier
+        mini_batch_times = mini_batch_times[1:]
+        mini_batch_time = sum(mini_batch_times) / len(mini_batch_times)
+        assert (0.75 * expected_mini_batch_times[cluster]
+                < mini_batch_time
+                < 1.25 * expected_mini_batch_times[cluster]), \
+                'average mini-batch time is outside expected range'
+
+    # Return test function from factory function
+    func.__name__ = test_name
+    return func
+
+m_lbann_args=f"--use_data_store --preload_data_store --metadata={metadata_prototext}"
+# Create test functions that can interact with PyTest
+for _test_func in tools.create_tests(setup_experiment,
+                                     __file__,
+                                     lbann_args=[m_lbann_args],
+                                     procs_per_node=procs_per_node,
+                                     nodes=num_nodes):
+    globals()[_test_func.__name__] = augment_test_func(_test_func)
diff --git a/bamboo/integration_tests/test_integration_pre_train_jag_wae.py b/bamboo/integration_tests/test_integration_pre_train_macc_wae.py
similarity index 91%
rename from bamboo/integration_tests/test_integration_pre_train_jag_wae.py
rename to bamboo/integration_tests/test_integration_pre_train_macc_wae.py
index b2a8c958dc7..5553ebcd12e 100644
--- a/bamboo/integration_tests/test_integration_pre_train_jag_wae.py
+++ b/bamboo/integration_tests/test_integration_pre_train_macc_wae.py
@@ -88,16 +88,16 @@ def setup_experiment(lbann):
 
     trainer = lbann.Trainer(mini_batch_size=mini_batch_size,
                             serialize_io=True)
-    import macc_trainable_models
+    import macc_models
     dump_models = 'dump_models'
     ltfb_batch_interval = 0
-    model = macc_trainable_models.construct_jag_wae_model(ydim=ydim,
-                                                          zdim=zdim,
-                                                          mcf=mcf,
-                                                          useCNN=useCNN,
-                                                          dump_models=dump_models,
-                                                          ltfb_batch_interval=ltfb_batch_interval,
-                                                          num_epochs=num_epochs)
+    model = macc_models.construct_jag_wae_model(ydim=ydim,
+                                                zdim=zdim,
+                                                mcf=mcf,
+                                                useCNN=useCNN,
+                                                dump_models=dump_models,
+                                                ltfb_batch_interval=ltfb_batch_interval,
+                                                num_epochs=num_epochs)
 
     # Setup optimizer
     opt = lbann.Adam(learn_rate=0.0001,beta1=0.9,beta2=0.99,eps=1e-8)
diff --git a/bamboo/integration_tests/test_integration_train_macc_surrogate.py b/bamboo/integration_tests/test_integration_train_macc_surrogate.py
index 4cd441ed860..9f396883b1c 100644
--- a/bamboo/integration_tests/test_integration_train_macc_surrogate.py
+++ b/bamboo/integration_tests/test_integration_train_macc_surrogate.py
@@ -92,21 +92,21 @@ def setup_experiment(lbann):
 
     trainer = lbann.Trainer(mini_batch_size=mini_batch_size,
                             serialize_io=True)
-    import macc_trainable_models
+    import macc_models
     dump_models = 'dump_models'
     ltfb_batch_interval = 0
     pretrained_dir = ' '
-    model = macc_trainable_models.construct_macc_surrogate_model(xdim=xdim,
-                                                                 ydim=ydim,
-                                                                 zdim=zdim,
-                                                                 wae_mcf=wae_mcf,
-                                                                 surrogate_mcf=surrogate_mcf,
-                                                                 lambda_cyc=lambda_cyc,
-                                                                 useCNN=useCNN,
-                                                                 dump_models=dump_models,
-                                                                 pretrained_dir=pretrained_dir,
-                                                                 ltfb_batch_interval=ltfb_batch_interval,
-                                                                 num_epochs=num_epochs)
+    model = macc_models.construct_macc_surrogate_model(xdim=xdim,
+                                                       ydim=ydim,
+                                                       zdim=zdim,
+                                                       wae_mcf=wae_mcf,
+                                                       surrogate_mcf=surrogate_mcf,
+                                                       lambda_cyc=lambda_cyc,
+                                                       useCNN=useCNN,
+                                                       dump_models=dump_models,
+                                                       pretrained_dir=pretrained_dir,
+                                                       ltfb_batch_interval=ltfb_batch_interval,
+                                                       num_epochs=num_epochs)
  
     # Setup optimizer
     opt = lbann.Adam(learn_rate=0.0001,beta1=0.9,beta2=0.99,eps=1e-8)

From 8874aae01f8ebf6eb39c11f23a8cda59eb9abe2b Mon Sep 17 00:00:00 2001
From: "Brian C. Van Essen" <vanessen1@llnl.gov>
Date: Fri, 12 Nov 2021 11:56:22 -0800
Subject: [PATCH 37/37] Renamed jag_trainable_models to jag_models.

---
 .../physics/ICF/{jag_trainable_models.py => jag_models.py} | 0
 applications/physics/ICF/train_jag_wae.py                  | 7 ++-----
 bamboo/integration_tests/test_integration_jag_wae.py       | 4 ++--
 3 files changed, 4 insertions(+), 7 deletions(-)
 rename applications/physics/ICF/{jag_trainable_models.py => jag_models.py} (100%)

diff --git a/applications/physics/ICF/jag_trainable_models.py b/applications/physics/ICF/jag_models.py
similarity index 100%
rename from applications/physics/ICF/jag_trainable_models.py
rename to applications/physics/ICF/jag_models.py
diff --git a/applications/physics/ICF/train_jag_wae.py b/applications/physics/ICF/train_jag_wae.py
index 4469cb1241f..1c31f85d6f2 100644
--- a/applications/physics/ICF/train_jag_wae.py
+++ b/applications/physics/ICF/train_jag_wae.py
@@ -1,4 +1,4 @@
-import jag_trainable_models
+import jag_models
 from os.path import abspath, dirname, join
 import google.protobuf.text_format as txtf
 
@@ -21,8 +21,7 @@
     mini_batch_size = 128
     trainer = lbann.Trainer(mini_batch_size=mini_batch_size,
                             serialize_io=True)
-#    model = construct_model()
-    model = jag_trainable_models.construct_jag_wae_model(y_dim=y_dim,
+    model = jag_models.construct_jag_wae_model(y_dim=y_dim,
                                                z_dim=z_dim,
                                                num_epochs=num_epochs)
     # Setup optimizer
@@ -37,8 +36,6 @@
                        scheduler='slurm',
                        nodes=1,
                        procs_per_node=1,
-                       lbann_args=[f'--data_reader_percent=0.1'],
                        time_limit=360,
-#                       setup_only=True,
                        job_name='jag_wae')
     print(status)
diff --git a/bamboo/integration_tests/test_integration_jag_wae.py b/bamboo/integration_tests/test_integration_jag_wae.py
index bdf43e5ec2a..babe7576733 100644
--- a/bamboo/integration_tests/test_integration_jag_wae.py
+++ b/bamboo/integration_tests/test_integration_jag_wae.py
@@ -86,10 +86,10 @@ def setup_experiment(lbann):
 
     trainer = lbann.Trainer(mini_batch_size=mini_batch_size,
                             serialize_io=True)
-    import jag_trainable_models
+    import jag_models
     dump_models = 'dump_models'
     ltfb_batch_interval = 0
-    model = jag_trainable_models.construct_jag_wae_model(y_dim=y_dim,
+    model = jag_models.construct_jag_wae_model(y_dim=y_dim,
                                                z_dim=z_dim,
                                                num_epochs=num_epochs)