From ba59b4c27a041830af3061a7f1da05a3831f8070 Mon Sep 17 00:00:00 2001
From: Guilherme Varela <guilhermevarela@hotmail.com>
Date: Mon, 1 Oct 2018 21:49:45 -0300
Subject: [PATCH] issue #18: Recon subtask for first layer validations

---
 models/estimators.py                          |  2 -
 models/optimizers.py                          | 13 +--
 models/predictors.py                          | 85 +++----------------
 models/propagators.py                         | 31 ++++---
 .../lr_5.00e-03/2018-10-01 125533/params.json |  1 -
 .../lr_5.00e-03/2018-10-01 125605/params.json |  1 -
 .../lr_5.00e-03/2018-10-01 130123/params.json |  1 -
 .../lr_5.00e-03/2018-10-01 130243/params.json |  1 -
 .../lr_5.00e-03/2018-10-01 130615/params.json |  1 -
 .../lr_5.00e-03/2018-10-01 130744/params.json |  1 -
 10 files changed, 37 insertions(+), 100 deletions(-)
 delete mode 100644 outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 125533/params.json
 delete mode 100644 outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 125605/params.json
 delete mode 100644 outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130123/params.json
 delete mode 100644 outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130243/params.json
 delete mode 100644 outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130615/params.json
 delete mode 100644 outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130744/params.json

diff --git a/models/estimators.py b/models/estimators.py
index 8e3a96db..e38b8a72 100644
--- a/models/estimators.py
+++ b/models/estimators.py
@@ -398,8 +398,6 @@ def valid_eval(Y, prefix='valid'):
                     [deep_srl.cost, deep_srl.optimize, deep_srl.predict, deep_srl.error],
                     feed_dict={X: X_batch, T: T_batch, L: L_batch}
                 )
-                if (step) % 250 == 0:
-                    import code; code.interact(local=dict(globals(), **locals()))
 
                 total_loss += loss
                 total_error += error
diff --git a/models/optimizers.py b/models/optimizers.py
index 60dfd50f..bf975cf6 100644
--- a/models/optimizers.py
+++ b/models/optimizers.py
@@ -193,16 +193,19 @@ def __init__(self, X, T, L, r_depth=-1,
 
             # down branch --> Handles classification
             # [BATCH, MAX_TIME, 2*hidden_size[:1]] this tensor is zero padded from 3rd position
-            self.propagator_0 = InterleavedPropagator(X, L, hidden_size[:r_depth], ru=ru)
-
+            self.propagator_0 = InterleavedPropagator(X, L, hidden_size[:r_depth], ru=ru,  i=0)
             # merge the represenations
             # print(self.predictor_0.get_shape())
             # print(self.propagator_0.get_shape())
-            self.Xhat = tf.concat((self.propagator_0.propagate, tf.cast(self.predictor_0.predict, tf.float32)), axis=2)
+            self.Rflat = self.predictor_0.predict
+            self.Rhat = tf.one_hot(self.Rflat, 3, on_value=1, off_value=0)
+            # Non zero features over 
+            # self.mask = tf.boolean_mask(self.Rhat, self.Rflat)
+            self.Xhat = tf.concat((self.propagator_0.propagate, tf.cast(self.Rhat, tf.float32)), axis=2)
 
             # joint propagator
-            self.propagator_1 = InterleavedPropagator(self.Xhat, L, hidden_size[r_depth:], ru=ru)
-            self.predictor_1 = CRFPredictor(self.propagator_1, self.C, L, i=1)
+            self.propagator_1 = InterleavedPropagator(self.Xhat, L, hidden_size[r_depth:], ru=ru, i=1)
+            self.predictor_1 = CRFPredictor(self.propagator_1.propagate, self.C, L, i=1)
 
         else:
             raise NotImplementedError('This combination of parameters is not implemented')
diff --git a/models/predictors.py b/models/predictors.py
index 0d49b0e5..3da7b9a2 100644
--- a/models/predictors.py
+++ b/models/predictors.py
@@ -18,17 +18,19 @@
 
 class Predictor(object):
 
-    def __init__(self, V, T, seqlens, i=0):
-        self.predictor = 'CRF'
+    def __init__(self, V, T, L, i=0):
+        scope_id = 'CRF-{:}'.format(i)
 
         self.V = V
         self.T = T
-        self.Tflat = tf.cast(tf.argmax(T, 2), tf.int32)
-        self.seqlens = seqlens
+        self.L = L
         self.i = i
-        self.score
-        self.cost
-        self.predict
+        self.Tflat = tf.cast(tf.argmax(T, 2), tf.int32)
+
+        with tf.variable_scope(scope_id):
+            self.score
+            self.cost
+            self.predict
 
     def score(self):
         raise NotImplementedError('Predictor must implement cost')
@@ -71,7 +73,7 @@ def cost(self):
         '''
         scope_id = 'cost{:}'.format(self.i)
         with tf.variable_scope(scope_id):
-            args = (self.S, self.Tflat, self.seqlens)
+            args = (self.S, self.Tflat, self.L)
             log_likelihood, self.transition_params = crf_log_likelihood(*args)
 
         return tf.reduce_mean(-log_likelihood)
@@ -91,7 +93,7 @@ def predict(self):
         scope_id = 'prediction{:}'.format(self.i)
         with tf.variable_scope(scope_id):
             # Compute the viterbi sequence and score.
-            args = (self.S, self.transition_params, self.seqlens)
+            args = (self.S, self.transition_params, self.L)
             viterbi_sequence, viterbi_score = crf_decode(*args)
 
         return tf.cast(viterbi_sequence, tf.int32)
@@ -107,67 +109,4 @@ def wo_shape(self):
     def bo_shape(self):
         # Static dimensions are of class Dimension(n)
         t = int(self.T.get_shape()[-1])
-        return (t,)
-
-# class CRFDualLabelPredictor(Predictor):
-#     '''Computes the viterbi_score for dual label tasks
-
-#     Previous works show that the argument recognition subtask is
-#     important. Have it being computed in parallel instead of
-#     having it computed as a pipeline
-
-#     Instead of having:
-#         B-A0, I-A0, B-V, B-A1, I-A1, I-A1
-
-#     Have:
-#         (B, A0), (I, A0), (B, V), (B, A1), (I, A1), (I, A1)
-
-#     Extends:
-#         Predictor
-#     '''
-#     def __init__(self, Scores, T, seqlens):
-#         self.predictor = 'CRF'
-
-#         self.Scores = Scores
-#         self.T = tf.cast(tf.argmax(T, 2), tf.int32)
-#         self.seqlens = seqlens
-
-#         self.cost
-#         self.predict
-
-#     @lazy_property
-#     def cost(self):
-#         '''Computes the viterbi_score after the propagation step, returns the cost.
-
-#         Consumes the representation coming from propagation layer, evaluates 
-#             the log_likelihod and parameters
-
-#         Decorators:
-#             lazy_property
-
-#         Returns:
-#             cost {tf.float64} -- A scalar holding the average log_likelihood 
-#             of the loss by estimatiof
-#         '''
-#         with tf.variable_scope('cost'):
-#             log_likelihood, self.transition_params = tf.contrib.crf.crf_log_likelihood(self.Scores, self.T, self.seqlens)
-
-#         return tf.reduce_mean(-log_likelihood)
-
-#     @lazy_property
-#     def predict(self):
-#         '''Decodes the viterbi score for the inputs
-
-#         Consumes both results from propagation and and cost layers
-
-#         Decorators:
-#             lazy_property
-
-#         Returns:
-#             [type] -- [description]
-#         '''
-#         with tf.variable_scope('prediction'):
-#             # Compute the viterbi sequence and score.
-#             viterbi_sequence, viterbi_score = tf.contrib.crf.crf_decode(self.Scores, self.transition_params, self.seqlens)
-
-#         return tf.cast(viterbi_sequence, tf.int32)
\ No newline at end of file
+        return (t,)
\ No newline at end of file
diff --git a/models/propagators.py b/models/propagators.py
index bc13f250..e50bb589 100644
--- a/models/propagators.py
+++ b/models/propagators.py
@@ -30,15 +30,18 @@ def get_unit(sz, ru='BasicLSTM'):
 
 class Propagator(object):
 
-    def __init__(self, V, seqlens, hidden_sz_list, ru='BasicLSTMCell'):
-        self.ru = ru
-        self.propagator = 'interleaved'
+    def __init__(self, V, L, hidden_sz_list, ru='BasicLSTMCell', i=0):
+        scope_id = 'DB{:}{:}'.format(ru, i)
 
-        self.hidden_sz_list = hidden_sz_list
         self.V = V
-        self.seqlens = seqlens
+        self.L = L
 
-        self.propagate
+        self.ru = ru
+        self.i = i
+
+        self.hidden_sz_list = hidden_sz_list
+        with tf.variable_scope(scope_id):
+            self.propagate
 
     def propagate(self):
         raise NotImplementedError('Propagator must implement propagate')
@@ -68,7 +71,7 @@ def propagate(self):
             outputs_fw, states = tf.nn.dynamic_rnn(
                 cell=self.cell_fw,
                 inputs=self.V,
-                sequence_length=self.seqlens,
+                sequence_length=self.L,
                 dtype=tf.float32,
                 time_major=False
             )
@@ -78,7 +81,7 @@ def propagate(self):
 
             inputs_bw = tf.reverse_sequence(
                 outputs_fw,
-                self.seqlens,
+                self.L,
                 batch_axis=0,
                 seq_axis=1
             )
@@ -86,13 +89,13 @@ def propagate(self):
             outputs_bw, states = tf.nn.dynamic_rnn(
                 cell=self.cell_bw,
                 inputs=inputs_bw,
-                sequence_length=self.seqlens,
+                sequence_length=self.L,
                 dtype=tf.float32,
                 time_major=False
             )
             outputs_bw = tf.reverse_sequence(
                 outputs_bw,
-                self.seqlens,
+                self.L,
                 batch_axis=0,
                 seq_axis=1
             )
@@ -108,7 +111,7 @@ def propagate(self):
                 outputs_fw, states = tf.nn.dynamic_rnn(
                     cell=self.cell_fw,
                     inputs=inputs_fw,
-                    sequence_length=self.seqlens,
+                    sequence_length=self.L,
                     dtype=tf.float32,
                     time_major=False)
 
@@ -116,7 +119,7 @@ def propagate(self):
                 inputs_bw = tf.concat((outputs_fw, h), axis=2)
                 inputs_bw = tf.reverse_sequence(
                     inputs_bw,
-                    self.seqlens,
+                    self.L,
                     batch_axis=0,
                     seq_axis=1
                 )
@@ -125,13 +128,13 @@ def propagate(self):
                 outputs_bw, states = tf.nn.dynamic_rnn(
                     cell=self.cell_bw,
                     inputs=inputs_bw,
-                    sequence_length=self.seqlens,
+                    sequence_length=self.L,
                     dtype=tf.float32,
                     time_major=False)
 
                 outputs_bw = tf.reverse_sequence(
                     outputs_bw,
-                    self.seqlens,
+                    self.L,
                     batch_axis=0,
                     seq_axis=1
                 )
diff --git a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 125533/params.json b/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 125533/params.json
deleted file mode 100644
index 64b5675c..00000000
--- a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 125533/params.json	
+++ /dev/null
@@ -1 +0,0 @@
-{"batch_size": 250, "chunks": false, "embeddings_model": "wan50", "embeddings_trainable": false, "epochs": 1000, "hidden_layers": [16, 16], "input_labels": ["ID", "FORM", "MARKER", "GPOS", "FORM_CTX_P-1", "FORM_CTX_P+0", "FORM_CTX_P+1"], "lr": 0.005, "r_depth": "1", "ru": "BasicLSTM", "target_labels": ["R", "IOB"], "version": "1.0"}
\ No newline at end of file
diff --git a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 125605/params.json b/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 125605/params.json
deleted file mode 100644
index 810c4dbd..00000000
--- a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 125605/params.json	
+++ /dev/null
@@ -1 +0,0 @@
-{"batch_size": 250, "chunks": false, "embeddings_model": "wan50", "embeddings_trainable": false, "epochs": 1000, "hidden_layers": [16, 16], "input_labels": ["ID", "FORM", "MARKER", "GPOS", "FORM_CTX_P-1", "FORM_CTX_P+0", "FORM_CTX_P+1"], "lr": 0.005, "r_depth": 1, "ru": "BasicLSTM", "target_labels": ["R", "IOB"], "version": "1.0"}
\ No newline at end of file
diff --git a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130123/params.json b/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130123/params.json
deleted file mode 100644
index 810c4dbd..00000000
--- a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130123/params.json	
+++ /dev/null
@@ -1 +0,0 @@
-{"batch_size": 250, "chunks": false, "embeddings_model": "wan50", "embeddings_trainable": false, "epochs": 1000, "hidden_layers": [16, 16], "input_labels": ["ID", "FORM", "MARKER", "GPOS", "FORM_CTX_P-1", "FORM_CTX_P+0", "FORM_CTX_P+1"], "lr": 0.005, "r_depth": 1, "ru": "BasicLSTM", "target_labels": ["R", "IOB"], "version": "1.0"}
\ No newline at end of file
diff --git a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130243/params.json b/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130243/params.json
deleted file mode 100644
index 810c4dbd..00000000
--- a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130243/params.json	
+++ /dev/null
@@ -1 +0,0 @@
-{"batch_size": 250, "chunks": false, "embeddings_model": "wan50", "embeddings_trainable": false, "epochs": 1000, "hidden_layers": [16, 16], "input_labels": ["ID", "FORM", "MARKER", "GPOS", "FORM_CTX_P-1", "FORM_CTX_P+0", "FORM_CTX_P+1"], "lr": 0.005, "r_depth": 1, "ru": "BasicLSTM", "target_labels": ["R", "IOB"], "version": "1.0"}
\ No newline at end of file
diff --git a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130615/params.json b/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130615/params.json
deleted file mode 100644
index 810c4dbd..00000000
--- a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130615/params.json	
+++ /dev/null
@@ -1 +0,0 @@
-{"batch_size": 250, "chunks": false, "embeddings_model": "wan50", "embeddings_trainable": false, "epochs": 1000, "hidden_layers": [16, 16], "input_labels": ["ID", "FORM", "MARKER", "GPOS", "FORM_CTX_P-1", "FORM_CTX_P+0", "FORM_CTX_P+1"], "lr": 0.005, "r_depth": 1, "ru": "BasicLSTM", "target_labels": ["R", "IOB"], "version": "1.0"}
\ No newline at end of file
diff --git a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130744/params.json b/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130744/params.json
deleted file mode 100644
index 810c4dbd..00000000
--- a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130744/params.json	
+++ /dev/null
@@ -1 +0,0 @@
-{"batch_size": 250, "chunks": false, "embeddings_model": "wan50", "embeddings_trainable": false, "epochs": 1000, "hidden_layers": [16, 16], "input_labels": ["ID", "FORM", "MARKER", "GPOS", "FORM_CTX_P-1", "FORM_CTX_P+0", "FORM_CTX_P+1"], "lr": 0.005, "r_depth": 1, "ru": "BasicLSTM", "target_labels": ["R", "IOB"], "version": "1.0"}
\ No newline at end of file