issue #27: Added BLSTM based stack

guilhermevarela · Nov 12, 2018 · dcf0508 · dcf0508
1 parent 0d463ce
commit dcf0508
Show file tree

Hide file tree

Showing 5 changed files with 33 additions and 22 deletions.
diff --git a/models/agents.py b/models/agents.py
@@ -107,7 +107,7 @@ def __init__(self, input_labels=FEATURE_LABELS, target_labels=TARGET_LABELS,
                  hidden_layers=HIDDEN_LAYERS, embeddings_model='wan50',
                  embeddings_trainable=False, epochs=100, lr=5 * 1e-3,
                  batch_size=250, version='1.0', rec_unit='BasicLSTM',
-                 recon_depth=-1, lang='pt', **kwargs):
+                 recon_depth=-1, lang='pt', stack='DB', **kwargs):
         '''Defines Dataflow graph
 
         Builds a Rnn tensorflow graph
@@ -170,7 +170,7 @@ def __init__(self, input_labels=FEATURE_LABELS, target_labels=TARGET_LABELS,
                 target_dir,
                 input_labels=input_labels, lr=lr,
                 hidden_layers=hidden_layers, ctx_p=ctx_p,
-                target_labels=target_labels, kfold=25,
+                target_labels=target_labels, stack=stack,
                 embeddings_trainable=False,
                 embeddings_model=embeddings_model, rec_unit=rec_unit,
                 epochs=epochs, chunks=chunks, recon_depth=recon_depth,
@@ -286,7 +286,8 @@ def __init__(self, input_labels=FEATURE_LABELS, target_labels=TARGET_LABELS,
         # The Labeler instanciation will build the archtecture
         targets_size = [cnf_dict[lbl]['dims'] for lbl in target_labels]
         kwargs = {'learning_rate': lr, 'hidden_size': hidden_layers,
-                  'targets_size': targets_size, 'rec_unit': rec_unit}
+                  'targets_size': targets_size, 'rec_unit': rec_unit,
+                  'stack': stack}
 
         if self.single_task:
             self.rnn = Labeler(self.X, self.T, self.L, **kwargs)
@@ -408,20 +409,20 @@ def fit(self):
                 total_loss += loss
                 total_error += error
 
-                if (step) % 25 == 0:
+                if (step) % 1000 == 0:
 
                     f1_train = self._evaluate_propositions(train_dict, 'train')
 
 
                     batch_end = time.time()
                     print('Iter={:5d}'.format(step),
                           '\tepochs {:5d}'.format(epochs),
-                          '\tavg. cost {:.6f}'.format(total_loss / 25 ),
-                          '\tavg. error {:.6f}'.format(total_error / 25 ),
-                          '\tavg. batch time {:.3f} s'.format((batch_end - batch_start) / 25 ),
+                          '\tavg. cost {:.6f}'.format(total_loss / 1000 ),
+                          '\tavg. error {:.6f}'.format(total_error / 1000 ),
+                          '\tavg. batch time {:.3f} s'.format((batch_end - batch_start) / 1000 ),
                           '\tf1-train {:.6f}'.format(f1_train))
 
-                    eps = float(total_error) / 25
+                    eps = float(total_error) / 1000
                     total_loss = 0.0
                     total_error = 0.0
                     batch_start = batch_end

diff --git a/models/labelers.py b/models/labelers.py
@@ -9,7 +9,7 @@
 import tensorflow as tf
 
 from models.lib.properties import delegate_property, lazy_property
-from models.propagators import InterleavedPropagator
+from models.propagators import get_propagator
 
 from models.predictors import CRFPredictor
 
@@ -42,7 +42,7 @@ def __new__(meta, name, base, body):
 class Labeler(object, metaclass=LabelerMeta):
     def __init__(self, X, T, L,
                  learning_rate=5 * 1e-3, hidden_size=[32, 32], targets_size=[60],
-                 rec_unit='BasicLSTM'):
+                 rec_unit='BasicLSTM', stack='DB'):
         '''Sets the computation graph parameters
 
         Responsable for building computation graph
@@ -83,7 +83,8 @@ def __init__(self, X, T, L,
         self.hidden_size = hidden_size
         self.targets_size = targets_size
 
-        self.propagator = InterleavedPropagator(X, L, hidden_size, rec_unit=rec_unit)
+        propagator_cls = get_propagator(stack)
+        self.propagator = propagator_cls(X, L, hidden_size, rec_unit=rec_unit)
         self.predictor = CRFPredictor(self.propagator.propagate, T, L)
 
         self.propagate

diff --git a/models/propagators.py b/models/propagators.py
@@ -33,6 +33,15 @@ def get_unit(sz, rec_unit='BasicLSTM'):
                                            state_is_tuple=True)
     return rnn_cell
 
+def get_propagator(stack='DB'):
+    if stack == 'DB':
+        return InterleavedPropagator
+
+    if stack == 'BI':
+        return BiPropagator
+
+    raise ValueError(f'stack must be in (`BI`, `DB`) got {stack}')
+
 
 class PropagatorMeta(type):
     '''This is a metaclass -- enforces method definition
@@ -140,7 +149,7 @@ class InterleavedPropagator(BasePropagator, metaclass=PropagatorMeta):
     #         self.propagate
     def __init__(self, V, L, hidden_layers, rec_unit='BasicLSTMCell', scope_label=''):
         self.scope_id = f'DB{rec_unit}_{scope_label}'
-        super(BasePropagator, self).__init__(V, L, hidden_layers, rec_unit=rec_unit, scope_label=scope_label)
+        super(InterleavedPropagator, self).__init__(V, L, hidden_layers, rec_unit=rec_unit, scope_label=scope_label)
 
     @lazy_property
     def propagate(self):
@@ -244,7 +253,7 @@ class BiPropagator(BasePropagator, metaclass=PropagatorMeta):
 
     def __init__(self, V, L, hidden_layers, rec_unit='BasicLSTMCell', scope_label=''):
         self.scope_id = f'Bi{rec_unit}_{scope_label}'
-        super(BasePropagator, self).__init__(V, L, hidden_layers, rec_unit=rec_unit, scope_label=scope_label)
+        super(BiPropagator, self).__init__(V, L, hidden_layers, rec_unit=rec_unit, scope_label=scope_label)
 
     @lazy_property
     def propagate(self):
@@ -263,18 +272,16 @@ def propagate(self):
                 * target_sz_list -- ouputs dimension
         '''
 
-        inputs = self.X
+        inputs = self.V
         for i, h in enumerate(self.hidden_layers):
             with tf.variable_scope(f'h{i}'):
                 fw = get_unit(h, rec_unit=self.rec_unit)
                 bw = get_unit(h, rec_unit=self.rec_unit)
-                outputs, _ = tf.nn.bidirectional_dynamic_rnn(
-                    fw,
-                    bw,
-                    inputs,
+                outputs, states = tf.nn.bidirectional_dynamic_rnn(
+                    fw, bw, inputs,
                     sequence_length=self.L,
-                    swap_memory=False,
-                    time_major=False
+                    dtype=tf.float32,
+                    time_major=False,
                 )
                 inputs = tf.concat(outputs, axis=2)
 

diff --git a/srl.py b/srl.py
@@ -125,6 +125,7 @@
     batch_size = args.batch_size
     rec_unit = args.rec_unit
     recon_depth = int(args.recon_depth)
+    stack = args.stack
 
     ctx_p = args.ctx_p
     if ctx_p > 1:
@@ -164,7 +165,7 @@
                 hidden_layers=args.depth, embeddings_model=embs_model,
                 epochs=epochs, rec_unit=rec_unit, batch_size=args.batch_size,
                 version=version, lr=learning_rate, recon_depth=recon_depth,
-                lang=lang)
+                lang=lang, stack=stack)
 
         agent.fit()
 

diff --git a/utils/snapshots.py b/utils/snapshots.py
@@ -103,7 +103,8 @@ def snapshot_persist(target_dir,  **kwargs):
             'hidden_layers', 'embeddings_model',
             'embeddings_trainable', 'epochs',
             'lr', 'batch_size', 'kfold', 'version',
-            'rec_unit', 'chunks', 'recon_depth', 'lang'}
+            'rec_unit', 'chunks', 'recon_depth', 'lang',
+            'stack'}
 
     # Clear exclusve parameters
     if 'kfold' in kwargs: