Create a model catalog in the library (OpenNMT#102)

TingxunShi · Apr 9, 2018 · 2158e29 · 2158e29
1 parent 791da62
commit 2158e29
Show file tree

Hide file tree

Showing 21 changed files with 413 additions and 317 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,7 @@ OpenNMT-tf follows [semantic versioning 2.0.0](https://semver.org/). The API cov
 ### New features
 
 * Update the OpenNMT tokenizer to 1.3.0 and use its Python package instead of requiring a manual compilation (Linux only)
+* Include a catalog of models in the library package and allow model selection with the `--model_type` command line option
 
 ### Fixes and improvements
 

diff --git a/README.md b/README.md
@@ -41,14 +41,14 @@ pip install OpenNMT-tf
 
 A minimal OpenNMT-tf run consists of 3 elements:
 
-* a **run** type: `train_and_eval`, `train`, `eval`, `infer`, or `export`
-* a Python file describing the **model**
-* a YAML file describing the **parameters**
+* the **run** type: `train_and_eval`, `train`, `eval`, `infer`, or `export`
+* the **model** type
+* the **parameters** described in a YAML file
 
 that are passed to the main script:
 
 ```
-onmt-main <run_type> --model <model_file.py> --config <config_file.yml>
+onmt-main <run_type> --model_type <model> --config <config_file.yml>
 ```
 
 * For more information about configuration files, see the [documentation](http://opennmt.net/OpenNMT-tf/configuration.html).
@@ -75,7 +75,7 @@ onmt-build-vocab --size 50000 --save_vocab data/toy-ende/tgt-vocab.txt data/toy-
 3\. Train with preset parameters:
 
 ```
-onmt-main train_and_eval --model config/models/nmt_small.py --config config/opennmt-defaults.yml config/data/toy-ende.yml
+onmt-main train_and_eval --model_type NMTSmall --config config/opennmt-defaults.yml config/data/toy-ende.yml
 ```
 
 4\. Translate a test file with the latest checkpoint:

diff --git a/config/models/character_seq2seq.py b/config/models/character_seq2seq.py
@@ -1,35 +1,3 @@
-"""Defines a character-based sequence-to-sequence model.
+from opennmt.models.catalog import CharacterSeq2Seq
 
-Character vocabularies can be built with:
-
-python -m bin.build_vocab --tokenizer CharacterTokenizer ...
-"""
-
-import tensorflow as tf
-import opennmt as onmt
-
-def model():
-  return onmt.models.SequenceToSequence(
-      source_inputter=onmt.inputters.WordEmbedder(
-          vocabulary_file_key="source_chars_vocabulary",
-          embedding_size=30,
-          tokenizer=onmt.tokenizers.CharacterTokenizer()),
-      target_inputter=onmt.inputters.WordEmbedder(
-          vocabulary_file_key="target_chars_vocabulary",
-          embedding_size=30,
-          tokenizer=onmt.tokenizers.CharacterTokenizer()),
-      encoder=onmt.encoders.BidirectionalRNNEncoder(
-          num_layers=4,
-          num_units=512,
-          reducer=onmt.layers.ConcatReducer(),
-          cell_class=tf.contrib.rnn.LSTMCell,
-          dropout=0.3,
-          residual_connections=False),
-      decoder=onmt.decoders.AttentionalRNNDecoder(
-          num_layers=4,
-          num_units=512,
-          bridge=onmt.layers.CopyBridge(),
-          attention_mechanism_class=tf.contrib.seq2seq.LuongAttention,
-          cell_class=tf.contrib.rnn.LSTMCell,
-          dropout=0.3,
-          residual_connections=False))
+model = CharacterSeq2Seq
diff --git a/config/models/listen_attend_spell.py b/config/models/listen_attend_spell.py
@@ -1,27 +1,3 @@
-"""Defines a model similar to the "Listen, Attend and Spell" model described
-in https://arxiv.org/abs/1508.01211.
-"""
+from opennmt.models.catalog import ListenAttendSpell
 
-import tensorflow as tf
-import opennmt as onmt
-
-def model():
-  return onmt.models.SequenceToSequence(
-      source_inputter=onmt.inputters.SequenceRecordInputter(),
-      target_inputter=onmt.inputters.WordEmbedder(
-          vocabulary_file_key="target_vocabulary",
-          embedding_size=50),
-      encoder=onmt.encoders.PyramidalRNNEncoder(
-          num_layers=3,
-          num_units=512,
-          reduction_factor=2,
-          cell_class=tf.contrib.rnn.LSTMCell,
-          dropout=0.3),
-      decoder=onmt.decoders.MultiAttentionalRNNDecoder(
-          num_layers=3,
-          num_units=512,
-          attention_layers=[0],
-          attention_mechanism_class=tf.contrib.seq2seq.LuongMonotonicAttention,
-          cell_class=tf.contrib.rnn.LSTMCell,
-          dropout=0.3,
-          residual_connections=False))
+model = ListenAttendSpell
diff --git a/config/models/multi_features_nmt.py b/config/models/multi_features_nmt.py
@@ -1,40 +1,3 @@
-"""Defines a sequence to sequence model with multiple input features. For
-example, this could be words, parts of speech, and lemmas that are embedded in
-parallel and concatenated into a single input embedding. The features are
-separate data files with separate vocabularies.
-"""
+from opennmt.models.catalog import MultiFeaturesNMT
 
-import tensorflow as tf
-import opennmt as onmt
-
-def model():
-  return onmt.models.SequenceToSequence(
-      source_inputter=onmt.inputters.ParallelInputter([
-          onmt.inputters.WordEmbedder(
-              vocabulary_file_key="source_words_vocabulary",
-              embedding_size=512),
-          onmt.inputters.WordEmbedder(
-              vocabulary_file_key="feature_1_vocabulary",
-              embedding_size=16),
-          onmt.inputters.WordEmbedder(
-              vocabulary_file_key="feature_2_vocabulary",
-              embedding_size=64)],
-          reducer=onmt.layers.ConcatReducer()),
-      target_inputter=onmt.inputters.WordEmbedder(
-          vocabulary_file_key="target_words_vocabulary",
-          embedding_size=512),
-      encoder=onmt.encoders.BidirectionalRNNEncoder(
-          num_layers=4,
-          num_units=512,
-          reducer=onmt.layers.ConcatReducer(),
-          cell_class=tf.contrib.rnn.LSTMCell,
-          dropout=0.3,
-          residual_connections=False),
-      decoder=onmt.decoders.AttentionalRNNDecoder(
-          num_layers=4,
-          num_units=512,
-          bridge=onmt.layers.CopyBridge(),
-          attention_mechanism_class=tf.contrib.seq2seq.LuongAttention,
-          cell_class=tf.contrib.rnn.LSTMCell,
-          dropout=0.3,
-          residual_connections=False))
+model = MultiFeaturesNMT
diff --git a/config/models/multi_source_nmt.py b/config/models/multi_source_nmt.py
@@ -1,44 +1,3 @@
-"""Defines a multi source sequence to sequence model. Source sequences are read
-from 2 files, encoded separately, and the encoder outputs are concatenated in
-time.
-"""
+from opennmt.models.catalog import MultiSourceNMT
 
-import tensorflow as tf
-import opennmt as onmt
-
-def model():
-  return onmt.models.SequenceToSequence(
-      source_inputter=onmt.inputters.ParallelInputter([
-          onmt.inputters.WordEmbedder(
-              vocabulary_file_key="source_vocabulary_1",
-              embedding_size=512),
-          onmt.inputters.WordEmbedder(
-              vocabulary_file_key="source_vocabulary_2",
-              embedding_size=512)]),
-      target_inputter=onmt.inputters.WordEmbedder(
-          vocabulary_file_key="target_vocabulary",
-          embedding_size=512),
-      encoder=onmt.encoders.ParallelEncoder([
-          onmt.encoders.BidirectionalRNNEncoder(
-              num_layers=2,
-              num_units=512,
-              reducer=onmt.layers.ConcatReducer(),
-              cell_class=tf.contrib.rnn.LSTMCell,
-              dropout=0.3,
-              residual_connections=False),
-          onmt.encoders.BidirectionalRNNEncoder(
-              num_layers=2,
-              num_units=512,
-              reducer=onmt.layers.ConcatReducer(),
-              cell_class=tf.contrib.rnn.LSTMCell,
-              dropout=0.3,
-              residual_connections=False)],
-          outputs_reducer=onmt.layers.ConcatReducer(axis=1)),
-      decoder=onmt.decoders.AttentionalRNNDecoder(
-          num_layers=4,
-          num_units=512,
-          bridge=onmt.layers.DenseBridge(),
-          attention_mechanism_class=tf.contrib.seq2seq.LuongAttention,
-          cell_class=tf.contrib.rnn.LSTMCell,
-          dropout=0.3,
-          residual_connections=False))
+model = MultiSourceNMT
diff --git a/config/models/nmt_medium.py b/config/models/nmt_medium.py
@@ -1,28 +1,3 @@
-"""Defines a medium-sized bidirectional LSTM encoder-decoder model."""
+from opennmt.models.catalog import NMTMedium
 
-import tensorflow as tf
-import opennmt as onmt
-
-def model():
-  return onmt.models.SequenceToSequence(
-      source_inputter=onmt.inputters.WordEmbedder(
-          vocabulary_file_key="source_words_vocabulary",
-          embedding_size=512),
-      target_inputter=onmt.inputters.WordEmbedder(
-          vocabulary_file_key="target_words_vocabulary",
-          embedding_size=512),
-      encoder=onmt.encoders.BidirectionalRNNEncoder(
-          num_layers=4,
-          num_units=512,
-          reducer=onmt.layers.ConcatReducer(),
-          cell_class=tf.contrib.rnn.LSTMCell,
-          dropout=0.3,
-          residual_connections=False),
-      decoder=onmt.decoders.AttentionalRNNDecoder(
-          num_layers=4,
-          num_units=512,
-          bridge=onmt.layers.CopyBridge(),
-          attention_mechanism_class=tf.contrib.seq2seq.LuongAttention,
-          cell_class=tf.contrib.rnn.LSTMCell,
-          dropout=0.3,
-          residual_connections=False))
+model = NMTMedium
diff --git a/config/models/nmt_medium_fp16.py b/config/models/nmt_medium_fp16.py
@@ -1,32 +1,3 @@
-"""Defines a medium-sized bidirectional LSTM encoder-decoder model with
-experimental FP16 data type.
-"""
+from opennmt.models.catalog import NMTMediumFP16
 
-import tensorflow as tf
-import opennmt as onmt
-
-def model():
-  return onmt.models.SequenceToSequence(
-      source_inputter=onmt.inputters.WordEmbedder(
-          vocabulary_file_key="source_words_vocabulary",
-          embedding_size=512,
-          dtype=tf.float16),
-      target_inputter=onmt.inputters.WordEmbedder(
-          vocabulary_file_key="target_words_vocabulary",
-          embedding_size=512,
-          dtype=tf.float16),
-      encoder=onmt.encoders.BidirectionalRNNEncoder(
-          num_layers=4,
-          num_units=512,
-          reducer=onmt.layers.ConcatReducer(),
-          cell_class=tf.contrib.rnn.LSTMCell,
-          dropout=0.3,
-          residual_connections=False),
-      decoder=onmt.decoders.AttentionalRNNDecoder(
-          num_layers=4,
-          num_units=512,
-          bridge=onmt.layers.CopyBridge(),
-          attention_mechanism_class=tf.contrib.seq2seq.LuongAttention,
-          cell_class=tf.contrib.rnn.LSTMCell,
-          dropout=0.3,
-          residual_connections=False))
+model = NMTMediumFP16
diff --git a/config/models/nmt_small.py b/config/models/nmt_small.py
@@ -1,27 +1,3 @@
-"""Defines a small unidirectional LSTM encoder-decoder model."""
+from opennmt.models.catalog import NMTSmall
 
-import tensorflow as tf
-import opennmt as onmt
-
-def model():
-  return onmt.models.SequenceToSequence(
-      source_inputter=onmt.inputters.WordEmbedder(
-          vocabulary_file_key="source_words_vocabulary",
-          embedding_size=512),
-      target_inputter=onmt.inputters.WordEmbedder(
-          vocabulary_file_key="target_words_vocabulary",
-          embedding_size=512),
-      encoder=onmt.encoders.UnidirectionalRNNEncoder(
-          num_layers=2,
-          num_units=512,
-          cell_class=tf.contrib.rnn.LSTMCell,
-          dropout=0.3,
-          residual_connections=False),
-      decoder=onmt.decoders.AttentionalRNNDecoder(
-          num_layers=2,
-          num_units=512,
-          bridge=onmt.layers.CopyBridge(),
-          attention_mechanism_class=tf.contrib.seq2seq.LuongAttention,
-          cell_class=tf.contrib.rnn.LSTMCell,
-          dropout=0.3,
-          residual_connections=False))
+model = NMTSmall
diff --git a/config/models/seq_tagger.py b/config/models/seq_tagger.py
@@ -1,30 +1,3 @@
-"""Defines a bidirectional LSTM-CNNs-CRF as described in https://arxiv.org/abs/1603.01354."""
+from opennmt.models.catalog import SeqTagger
 
-import tensorflow as tf
-import opennmt as onmt
-
-def model():
-  return onmt.models.SequenceTagger(
-      inputter=onmt.inputters.MixedInputter([
-          onmt.inputters.WordEmbedder(
-              vocabulary_file_key="words_vocabulary",
-              embedding_size=None,
-              embedding_file_key="words_embedding",
-              trainable=True),
-          onmt.inputters.CharConvEmbedder(
-              vocabulary_file_key="chars_vocabulary",
-              embedding_size=30,
-              num_outputs=30,
-              kernel_size=3,
-              stride=1,
-              dropout=0.5)],
-          dropout=0.5),
-      encoder=onmt.encoders.BidirectionalRNNEncoder(
-          num_layers=1,
-          num_units=400,
-          reducer=onmt.layers.ConcatReducer(),
-          cell_class=tf.contrib.rnn.LSTMCell,
-          dropout=0.5,
-          residual_connections=False),
-      labels_vocabulary_file_key="tags_vocabulary",
-      crf_decoding=True)
+model = SeqTagger
diff --git a/config/models/transformer.py b/config/models/transformer.py
@@ -1,20 +1,3 @@
-"""Defines a Transformer model as decribed in https://arxiv.org/abs/1706.03762."""
+from opennmt.models.catalog import Transformer
 
-import tensorflow as tf
-import opennmt as onmt
-
-def model():
-  return onmt.models.Transformer(
-      source_inputter=onmt.inputters.WordEmbedder(
-          vocabulary_file_key="source_words_vocabulary",
-          embedding_size=512),
-      target_inputter=onmt.inputters.WordEmbedder(
-          vocabulary_file_key="target_words_vocabulary",
-          embedding_size=512),
-      num_layers=6,
-      num_units=512,
-      num_heads=8,
-      ffn_inner_dim=2048,
-      dropout=0.1,
-      attention_dropout=0.1,
-      relu_dropout=0.1)
+model = Transformer
diff --git a/config/models/transformer_fp16.py b/config/models/transformer_fp16.py
@@ -1,22 +1,3 @@
-"""Defines a Transformer model with experimental FP16 data type."""
+from opennmt.models.catalog import TransformerFP16
 
-import tensorflow as tf
-import opennmt as onmt
-
-def model():
-  return onmt.models.Transformer(
-      source_inputter=onmt.inputters.WordEmbedder(
-          vocabulary_file_key="source_words_vocabulary",
-          embedding_size=512,
-          dtype=tf.float16),
-      target_inputter=onmt.inputters.WordEmbedder(
-          vocabulary_file_key="target_words_vocabulary",
-          embedding_size=512,
-          dtype=tf.float16),
-      num_layers=6,
-      num_units=512,
-      num_heads=8,
-      ffn_inner_dim=2048,
-      dropout=0.1,
-      attention_dropout=0.1,
-      relu_dropout=0.1)
+model = TransformerFP16
diff --git a/docs/configuration.md b/docs/configuration.md
@@ -2,9 +2,9 @@
 
 ## Model
 
-Models are defined from the code to allow a high level of modeling freedom. The user should provide a `opennmt.models.Model` instance using [available](package/opennmt.html) or user-defined modules.
+### Definition
 
-Some modules are defined to contain other modules and can be used to design complex architectures:
+Models are defined from the code to allow a high level of modeling freedom. They are `opennmt.models.Model` instances that use [available](package/opennmt.html) or user-defined modules. Some of these modules are defined to contain other modules and can be used to design complex architectures:
 
 * `opennmt.encoders.ParallelEncoder`
 * `opennmt.encoders.SequentialEncoder`
@@ -13,7 +13,14 @@ Some modules are defined to contain other modules and can be used to design comp
 
 For example, these container modules can be used to implement multi source inputs, multi modal training, mixed word/character embeddings, and arbitrarily complex encoder architectures (e.g. mixing convolution, RNN, self-attention, etc.).
 
-*See the template file `config/models/template.py` and predefined models in `config/models/`. Contributions to add more model configurations are welcome.*
+### Usage
+
+The user can either:
+
+* select a predefined model from the [catalog](package/opennmt.models.catalog.html) and use the `--model_type` command line option
+* **or** provide a custom configuration file that follows the template file `config/models/template.py` and use the `--model` command line option
+
+*See the predefined models definitions in the [catalog](_modules/opennmt/models/catalog.html). Contributions to add more default models are welcome.*
 
 ## Parameters