diff --git a/contrib/swig/CMakeLists.txt b/contrib/swig/CMakeLists.txt index c921c3391..1cb130408 100644 --- a/contrib/swig/CMakeLists.txt +++ b/contrib/swig/CMakeLists.txt @@ -49,6 +49,7 @@ add_jar( "${CMAKE_SWIG_OUTDIR}/AdamTrainer.java" "${CMAKE_SWIG_OUTDIR}/ComputationGraph.java" "${CMAKE_SWIG_OUTDIR}/CoupledLSTMBuilder.java" + "${CMAKE_SWIG_OUTDIR}/CompactVanillaLSTMBuilder.java" "${CMAKE_SWIG_OUTDIR}/CyclicalSGDTrainer.java" "${CMAKE_SWIG_OUTDIR}/Device.java" "${CMAKE_SWIG_OUTDIR}/DeviceMempool.java" diff --git a/contrib/swig/dynet_swig.i b/contrib/swig/dynet_swig.i index ca75a2849..40f0a2c74 100644 --- a/contrib/swig/dynet_swig.i +++ b/contrib/swig/dynet_swig.i @@ -798,7 +798,7 @@ struct AdamTrainer : public Trainer { %nodefaultctor RNNBuilder; struct RNNBuilder { RNNPointer state() const; - void new_graph(ComputationGraph& cg); + void new_graph(ComputationGraph& cg, bool update = true); void start_new_sequence(const std::vector& h_0 = {}); Expression set_h(const RNNPointer& prev, const std::vector& h_new = {}); Expression set_s(const RNNPointer& prev, const std::vector& s_new = {}); @@ -818,6 +818,8 @@ struct RNNBuilder { virtual unsigned num_h0_components() const = 0; virtual void copy(const RNNBuilder& params) = 0; + + virtual ParameterCollection & get_parameter_collection() = 0; }; struct SimpleRNNBuilder : public RNNBuilder { @@ -840,6 +842,8 @@ struct SimpleRNNBuilder : public RNNBuilder { void copy(const RNNBuilder& params) override; unsigned num_h0_components() const override; + + ParameterCollection & get_parameter_collection() override; }; //////////////////////////////////// @@ -865,6 +869,11 @@ struct CoupledLSTMBuilder : public RNNBuilder { void set_dropout(float d); void set_dropout(float d, float d_h, float d_c); void disable_dropout(); + void set_dropout_masks(unsigned batch_size = 1); + + ParameterCollection & get_parameter_collection() override; + + ParameterCollection local_model; // first index is layer, then ... std::vector> params; @@ -872,19 +881,24 @@ struct CoupledLSTMBuilder : public RNNBuilder { // first index is layer, then ... std::vector> param_vars; - // first index is time, second is layer - std::vector> h, c; - // first index is layer, then ... // masks for Gal dropout std::vector> masks; + // first index is time, second is layer + std::vector> h, c; + // initial values of h and c at each layer // - both default to zero matrix input bool has_initial_state; // if this is false, treat h0 and c0 as 0 std::vector h0; std::vector c0; unsigned layers; + unsigned input_dim = 0; + unsigned hid = 0; + bool dropout_masks_valid; + + float dropout_rate_h = 0.f, dropout_rate_c = 0.f; }; struct VanillaLSTMBuilder : public RNNBuilder { @@ -908,6 +922,11 @@ struct VanillaLSTMBuilder : public RNNBuilder { void set_dropout(float d); void set_dropout(float d, float d_r); void disable_dropout(); + void set_dropout_masks(unsigned batch_size = 1); + + ParameterCollection & get_parameter_collection() override; + + ParameterCollection local_model; // first index is layer, then ... std::vector> params; @@ -919,6 +938,9 @@ struct VanillaLSTMBuilder : public RNNBuilder { // first index is layer, then ... std::vector> ln_param_vars; + // first index is layer, then ... + std::vector> masks; + // first index is time, second is layer std::vector> h, c; @@ -928,11 +950,65 @@ struct VanillaLSTMBuilder : public RNNBuilder { std::vector h0; std::vector c0; unsigned layers; - unsigned hid; + unsigned input_dim, hid; + float dropout_rate_h; + bool ln_lstm; + bool dropout_masks_valid; }; typedef VanillaLSTMBuilder LSTMBuilder; +struct CompactVanillaLSTMBuilder : public RNNBuilder { + CompactVanillaLSTMBuilder(); + explicit CompactVanillaLSTMBuilder(unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + ParameterCollection& model); + + Expression back() const override; + std::vector final_h() const override; + std::vector final_s() const override; + unsigned num_h0_components() const override; + + std::vector get_h(RNNPointer i) const override; + std::vector get_s(RNNPointer i) const override; + + void copy(const RNNBuilder & params) override; + + void set_dropout(float d); + void set_dropout(float d, float d_r); + void disable_dropout(); + void set_dropout_masks(unsigned batch_size = 1); + void set_weightnoise(float std); + + ParameterCollection & get_parameter_collection() override; + + ParameterCollection local_model; + + // first index is layer, then ... + std::vector> params; + + // first index is layer, then ... + std::vector> param_vars; + + // first index is layer, then ... + std::vector> masks; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; + unsigned input_dim, hid; + float dropout_rate_h; + float weightnoise_std; + bool dropout_masks_valid; +}; + /////////////////////////////////// // declarations from dynet/gru.h // /////////////////////////////////// @@ -950,6 +1026,7 @@ struct GRUBuilder : public RNNBuilder { std::vector get_s(RNNPointer i) const override; unsigned num_h0_components() const override; void copy(const RNNBuilder & params) override; + ParameterCollection & get_parameter_collection() override; }; @@ -974,6 +1051,10 @@ struct FastLSTMBuilder : public RNNBuilder { void copy(const RNNBuilder & params) override; + ParameterCollection & get_parameter_collection() override; + + ParameterCollection local_model; + std::vector> params; std::vector> param_vars; diff --git a/contrib/swig/src/main/scala/edu/cmu/dynet/LSTMBuilder.scala b/contrib/swig/src/main/scala/edu/cmu/dynet/LSTMBuilder.scala index 78191f954..3d144a783 100644 --- a/contrib/swig/src/main/scala/edu/cmu/dynet/LSTMBuilder.scala +++ b/contrib/swig/src/main/scala/edu/cmu/dynet/LSTMBuilder.scala @@ -10,6 +10,8 @@ class VanillaLstmBuilder private[dynet](private[dynet] val builder: internal.Van } def setDropout(d: Float, dR: Float): Unit = builder.set_dropout(d, dR) + + def setDropoutMasks(batchSize:Long): Unit = builder.set_dropout_masks(batchSize) } // TODO(joelgrus): get the typedef to work @@ -23,6 +25,8 @@ class LstmBuilder private[dynet](private[dynet] val builder: internal.VanillaLST } def setDropout(d: Float, dR: Float): Unit = builder.set_dropout(d, dR) + + def setDropoutMasks(batchSize:Long): Unit = builder.set_dropout_masks(batchSize) } /** Builder method for creating LSTMs, as in the C++ code. For its public methods see @@ -41,4 +45,20 @@ class CoupledLstmBuilder private[dynet](private[dynet] val builder: internal.Cou } def setDropout(d: Float, dH: Float, dC: Float): Unit = builder.set_dropout(d, dH, dC) + + def setDropoutMasks(batchSize:Long): Unit = builder.set_dropout_masks(batchSize) +} + +class CompactVanillaLSTMBuilder private[dynet](private[dynet] val builder: internal.CompactVanillaLSTMBuilder) + extends RnnBuilder(builder) { + + def this() {this(new internal.CompactVanillaLSTMBuilder()) } + + def this(layers:Long, inputDim:Long, hiddenDim: Long, model: ParameterCollection) { + this(new internal.CompactVanillaLSTMBuilder(layers, inputDim, hiddenDim, model.model)) + } + + def setDropout(d: Float, dR: Float): Unit = builder.set_dropout(d, dR) + + def setDropoutMasks(batchSize:Long): Unit = builder.set_dropout_masks(batchSize) } diff --git a/contrib/swig/src/main/scala/edu/cmu/dynet/RNNBuilder.scala b/contrib/swig/src/main/scala/edu/cmu/dynet/RNNBuilder.scala index ff5f6d75d..75c562ef8 100644 --- a/contrib/swig/src/main/scala/edu/cmu/dynet/RNNBuilder.scala +++ b/contrib/swig/src/main/scala/edu/cmu/dynet/RNNBuilder.scala @@ -5,9 +5,9 @@ abstract class RnnBuilder(private[dynet] val _builder: internal.RNNBuilder) { var version: Long = ComputationGraph.version def state(): Int = _builder.state - def newGraph(): Unit = { + def newGraph(update:Boolean = true): Unit = { version = ComputationGraph.version - _builder.new_graph(ComputationGraph.cg) + _builder.new_graph(ComputationGraph.cg, update) } def startNewSequence(ev: ExpressionVector): Unit = _builder.start_new_sequence(ev.vector) diff --git a/contrib/swig/src/main/scala/edu/cmu/dynet/examples/RnnLanguageModelBatch.scala b/contrib/swig/src/main/scala/edu/cmu/dynet/examples/RnnLanguageModelBatch.scala index e3a129504..349cbb3af 100644 --- a/contrib/swig/src/main/scala/edu/cmu/dynet/examples/RnnLanguageModelBatch.scala +++ b/contrib/swig/src/main/scala/edu/cmu/dynet/examples/RnnLanguageModelBatch.scala @@ -130,8 +130,8 @@ object RnnLanguageModelBatch { val userDir = System.getProperty("user.dir") - val TRAIN_FILE = Paths.get(userDir, "../examples/cpp/example-data/train-hsm.txt").toString - val DEV_FILE = Paths.get(userDir, "../examples/cpp/example-data/dev-hsm.txt").toString + val TRAIN_FILE = Paths.get(userDir, "../../examples/cpp/example-data/train-hsm.txt").toString + val DEV_FILE = Paths.get(userDir, "../../examples/cpp/example-data/dev-hsm.txt").toString def main(args: Array[String]) {