Merge branch 'feature/bilstm-propagator' into development

guilhermevarela · Nov 12, 2018 · 52b0d9e · 52b0d9e
2 parents 424b7d7 + dcf0508
commit 52b0d9e
Show file tree

Hide file tree

Showing 19 changed files with 247,281 additions and 40 deletions.
diff --git a/models/agents.py b/models/agents.py
@@ -107,7 +107,7 @@ def __init__(self, input_labels=FEATURE_LABELS, target_labels=TARGET_LABELS,
                  hidden_layers=HIDDEN_LAYERS, embeddings_model='wan50',
                  embeddings_trainable=False, epochs=100, lr=5 * 1e-3,
                  batch_size=250, version='1.0', rec_unit='BasicLSTM',
-                 recon_depth=-1, lang='pt', **kwargs):
+                 recon_depth=-1, lang='pt', stack='DB', **kwargs):
         '''Defines Dataflow graph
 
         Builds a Rnn tensorflow graph
@@ -170,7 +170,7 @@ def __init__(self, input_labels=FEATURE_LABELS, target_labels=TARGET_LABELS,
                 target_dir,
                 input_labels=input_labels, lr=lr,
                 hidden_layers=hidden_layers, ctx_p=ctx_p,
-                target_labels=target_labels, kfold=25,
+                target_labels=target_labels, stack=stack,
                 embeddings_trainable=False,
                 embeddings_model=embeddings_model, rec_unit=rec_unit,
                 epochs=epochs, chunks=chunks, recon_depth=recon_depth,
@@ -286,7 +286,8 @@ def __init__(self, input_labels=FEATURE_LABELS, target_labels=TARGET_LABELS,
         # The Labeler instanciation will build the archtecture
         targets_size = [cnf_dict[lbl]['dims'] for lbl in target_labels]
         kwargs = {'learning_rate': lr, 'hidden_size': hidden_layers,
-                  'targets_size': targets_size, 'rec_unit': rec_unit}
+                  'targets_size': targets_size, 'rec_unit': rec_unit,
+                  'stack': stack}
 
         if self.single_task:
             self.rnn = Labeler(self.X, self.T, self.L, **kwargs)
@@ -408,20 +409,20 @@ def fit(self):
                 total_loss += loss
                 total_error += error
 
-                if (step) % 25 == 0:
+                if (step) % 1000 == 0:
 
                     f1_train = self._evaluate_propositions(train_dict, 'train')
 
 
                     batch_end = time.time()
                     print('Iter={:5d}'.format(step),
                           '\tepochs {:5d}'.format(epochs),
-                          '\tavg. cost {:.6f}'.format(total_loss / 25 ),
-                          '\tavg. error {:.6f}'.format(total_error / 25 ),
-                          '\tavg. batch time {:.3f} s'.format((batch_end - batch_start) / 25 ),
+                          '\tavg. cost {:.6f}'.format(total_loss / 1000 ),
+                          '\tavg. error {:.6f}'.format(total_error / 1000 ),
+                          '\tavg. batch time {:.3f} s'.format((batch_end - batch_start) / 1000 ),
                           '\tf1-train {:.6f}'.format(f1_train))
 
-                    eps = float(total_error) / 25
+                    eps = float(total_error) / 1000
                     total_loss = 0.0
                     total_error = 0.0
                     batch_start = batch_end

diff --git a/models/labelers.py b/models/labelers.py
@@ -9,7 +9,7 @@
 import tensorflow as tf
 
 from models.lib.properties import delegate_property, lazy_property
-from models.propagators import InterleavedPropagator
+from models.propagators import get_propagator
 
 from models.predictors import CRFPredictor
 
@@ -42,7 +42,7 @@ def __new__(meta, name, base, body):
 class Labeler(object, metaclass=LabelerMeta):
     def __init__(self, X, T, L,
                  learning_rate=5 * 1e-3, hidden_size=[32, 32], targets_size=[60],
-                 rec_unit='BasicLSTM'):
+                 rec_unit='BasicLSTM', stack='DB'):
         '''Sets the computation graph parameters
 
         Responsable for building computation graph
@@ -83,7 +83,8 @@ def __init__(self, X, T, L,
         self.hidden_size = hidden_size
         self.targets_size = targets_size
 
-        self.propagator = InterleavedPropagator(X, L, hidden_size, rec_unit=rec_unit)
+        propagator_cls = get_propagator(stack)
+        self.propagator = propagator_cls(X, L, hidden_size, rec_unit=rec_unit)
         self.predictor = CRFPredictor(self.propagator.propagate, T, L)
 
         self.propagate

diff --git a/models/propagators.py b/models/propagators.py
@@ -7,28 +7,6 @@
 import tensorflow as tf
 from models.lib.properties import lazy_property
 
-
-class PropagatorMeta(type):
-    '''This is a metaclass -- enforces method definition
-    on function body
-
-    Every Propagator must implent the following methods
-    * propagate - builds automatic features from input layer
-
-    References:
-        https://docs.python.org/3/reference/datamodel.html#metaclasses
-        https://realpython.com/python-metaclasses/
-    '''
-    def __new__(meta, name, base, body):
-        propagator_methods = ('propagate',)
-
-        for pm in propagator_methods:
-            if pm not in body:
-                msg = 'Agent must implement {:}'.format(pm)
-                raise TypeError(msg)
-
-        return super().__new__(meta, name, base, body)
-
 def get_unit(sz, rec_unit='BasicLSTM'):
     ru_types = ('BasicLSTM', 'GRU', 'LSTM', 'LSTMBlockCell')
     if rec_unit not in ru_types:
@@ -47,15 +25,46 @@ def get_unit(sz, rec_unit='BasicLSTM'):
                                            forget_bias=1.0,
                                            state_is_tuple=True)
 
-    if rec_unit == 'LSTMBlockCell': # Should run faster then BasicLSTM and LSTM
+    # Should run faster then BasicLSTM and LSTM
+    if rec_unit == 'LSTMBlockCell':
         rnn_cell = tf.nn.rnn_cell.LSTMBlockCell(sz,
                                            use_peepholes=True,
                                            forget_bias=1.0,
                                            state_is_tuple=True)
     return rnn_cell
 
+def get_propagator(stack='DB'):
+    if stack == 'DB':
+        return InterleavedPropagator
+
+    if stack == 'BI':
+        return BiPropagator
+
+    raise ValueError(f'stack must be in (`BI`, `DB`) got {stack}')
+
+
+class PropagatorMeta(type):
+    '''This is a metaclass -- enforces method definition
+    on function body
+
+    Every Propagator must implent the following methods
+    * propagate - builds automatic features from input layer
+
+    References:
+        https://docs.python.org/3/reference/datamodel.html#metaclasses
+        https://realpython.com/python-metaclasses/
+    '''
+    def __new__(meta, name, base, body):
+        propagator_methods = ('propagate',)
+
+        for pm in propagator_methods:
+            if pm not in body:
+                msg = 'Agent must implement {:}'.format(pm)
+                raise TypeError(msg)
+
+        return super().__new__(meta, name, base, body)
 
-class InterleavedPropagator(object, metaclass=PropagatorMeta):
+class BasePropagator(object):
 
     def __init__(self, V, L, hidden_layers, rec_unit='BasicLSTMCell', scope_label=''):
         '''Builds a recurrent neural network section of the graph
@@ -84,8 +93,7 @@ def __init__(self, V, L, hidden_layers, rec_unit='BasicLSTMCell', scope_label=''
 
             http://www.aclweb.org/anthology/P15-1109
         '''
-
-        scope_id = 'DB{:}{:}'.format(rec_unit, scope_label)
+        # self.scope_id = '{:}-{:}'.format(rec_unit, scope_label)
 
         self.V = V
         self.L = L
@@ -94,9 +102,55 @@ def __init__(self, V, L, hidden_layers, rec_unit='BasicLSTMCell', scope_label=''
         self.scope_label = scope_label
 
         self.hidden_layers = hidden_layers
-        with tf.variable_scope(scope_id):
+        with tf.variable_scope(self.scope_id):
             self.propagate
 
+
+class InterleavedPropagator(BasePropagator, metaclass=PropagatorMeta):
+
+    # def __init__(self, V, L, hidden_layers, rec_unit='BasicLSTMCell', scope_label=''):
+    #     '''Builds a recurrent neural network section of the graph
+
+    #     Extends:
+    #         metaclass=PropagatorMeta
+
+    #     Arguments:
+    #         V {tensor} -- Rank 3 input tensor having dimensions as follows
+    #                     [batches, max_time, features]
+
+    #         L {tensor} -- Rank 1 length tensor having the true length of each example
+
+    #         hidden_layers {list} -- Hidden list
+
+    #     Keyword Arguments:
+    #         rec_unit {str} -- Name of the recurrent unit (default: {'BasicLSTMCell'})
+    #         scope_label {str} -- Label for this scope (default: {''})
+
+    #     References:
+    #         Jie Zhou and Wei Xu. 2015.
+
+    #         "End-to-end learning of semantic role labeling using recurrent neural
+    #         networks". In Proc. of the Annual Meeting of the Association
+    #         for Computational Linguistics (ACL)
+
+    #         http://www.aclweb.org/anthology/P15-1109
+    #     '''
+
+    #     scope_id = 'DB{:}{:}'.format(rec_unit, scope_label)
+
+    #     self.V = V
+    #     self.L = L
+
+    #     self.rec_unit = rec_unit
+    #     self.scope_label = scope_label
+
+    #     self.hidden_layers = hidden_layers
+    #     with tf.variable_scope(scope_id):
+    #         self.propagate
+    def __init__(self, V, L, hidden_layers, rec_unit='BasicLSTMCell', scope_label=''):
+        self.scope_id = f'DB{rec_unit}_{scope_label}'
+        super(InterleavedPropagator, self).__init__(V, L, hidden_layers, rec_unit=rec_unit, scope_label=scope_label)
+
     @lazy_property
     def propagate(self):
         '''Forward propagates the inputs V thru interlaced bi-lstm network
@@ -194,3 +248,41 @@ def propagate(self):
 
         # return self.V
         return tf.concat((h, h_1), axis=2)
+
+class BiPropagator(BasePropagator, metaclass=PropagatorMeta):
+
+    def __init__(self, V, L, hidden_layers, rec_unit='BasicLSTMCell', scope_label=''):
+        self.scope_id = f'Bi{rec_unit}_{scope_label}'
+        super(BiPropagator, self).__init__(V, L, hidden_layers, rec_unit=rec_unit, scope_label=scope_label)
+
+    @lazy_property
+    def propagate(self):
+        '''Forward propagates the inputs V thru interlaced bi-lstm network
+
+        The inputs X are evaluated by each hidden layer (forward propagating)
+        resulting in scores to be consumed by prediction layer
+
+        Decorators:
+            lazy_property
+
+        Returns:
+            score {tf.Variable} -- a 3D float tensor in which
+                * batch_size -- fixed sample size from examples
+                * max_time -- maximum time from batch_size examples (default: None)
+                * target_sz_list -- ouputs dimension
+        '''
+
+        inputs = self.V
+        for i, h in enumerate(self.hidden_layers):
+            with tf.variable_scope(f'h{i}'):
+                fw = get_unit(h, rec_unit=self.rec_unit)
+                bw = get_unit(h, rec_unit=self.rec_unit)
+                outputs, states = tf.nn.bidirectional_dynamic_rnn(
+                    fw, bw, inputs,
+                    sequence_length=self.L,
+                    dtype=tf.float32,
+                    time_major=False,
+                )
+                inputs = tf.concat(outputs, axis=2)
+
+        return inputs