From ba59b4c27a041830af3061a7f1da05a3831f8070 Mon Sep 17 00:00:00 2001 From: Guilherme Varela Date: Mon, 1 Oct 2018 21:49:45 -0300 Subject: [PATCH] issue #18: Recon subtask for first layer validations --- models/estimators.py | 2 - models/optimizers.py | 13 +-- models/predictors.py | 85 +++---------------- models/propagators.py | 31 ++++--- .../lr_5.00e-03/2018-10-01 125533/params.json | 1 - .../lr_5.00e-03/2018-10-01 125605/params.json | 1 - .../lr_5.00e-03/2018-10-01 130123/params.json | 1 - .../lr_5.00e-03/2018-10-01 130243/params.json | 1 - .../lr_5.00e-03/2018-10-01 130615/params.json | 1 - .../lr_5.00e-03/2018-10-01 130744/params.json | 1 - 10 files changed, 37 insertions(+), 100 deletions(-) delete mode 100644 outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 125533/params.json delete mode 100644 outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 125605/params.json delete mode 100644 outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130123/params.json delete mode 100644 outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130243/params.json delete mode 100644 outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130615/params.json delete mode 100644 outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130744/params.json diff --git a/models/estimators.py b/models/estimators.py index 8e3a96db..e38b8a72 100644 --- a/models/estimators.py +++ b/models/estimators.py @@ -398,8 +398,6 @@ def valid_eval(Y, prefix='valid'): [deep_srl.cost, deep_srl.optimize, deep_srl.predict, deep_srl.error], feed_dict={X: X_batch, T: T_batch, L: L_batch} ) - if (step) % 250 == 0: - import code; code.interact(local=dict(globals(), **locals())) total_loss += loss total_error += error diff --git a/models/optimizers.py b/models/optimizers.py index 60dfd50f..bf975cf6 100644 --- a/models/optimizers.py +++ b/models/optimizers.py @@ -193,16 +193,19 @@ def __init__(self, X, T, L, r_depth=-1, # down branch --> Handles classification # [BATCH, MAX_TIME, 2*hidden_size[:1]] this tensor is zero padded from 3rd position - self.propagator_0 = InterleavedPropagator(X, L, hidden_size[:r_depth], ru=ru) - + self.propagator_0 = InterleavedPropagator(X, L, hidden_size[:r_depth], ru=ru, i=0) # merge the represenations # print(self.predictor_0.get_shape()) # print(self.propagator_0.get_shape()) - self.Xhat = tf.concat((self.propagator_0.propagate, tf.cast(self.predictor_0.predict, tf.float32)), axis=2) + self.Rflat = self.predictor_0.predict + self.Rhat = tf.one_hot(self.Rflat, 3, on_value=1, off_value=0) + # Non zero features over + # self.mask = tf.boolean_mask(self.Rhat, self.Rflat) + self.Xhat = tf.concat((self.propagator_0.propagate, tf.cast(self.Rhat, tf.float32)), axis=2) # joint propagator - self.propagator_1 = InterleavedPropagator(self.Xhat, L, hidden_size[r_depth:], ru=ru) - self.predictor_1 = CRFPredictor(self.propagator_1, self.C, L, i=1) + self.propagator_1 = InterleavedPropagator(self.Xhat, L, hidden_size[r_depth:], ru=ru, i=1) + self.predictor_1 = CRFPredictor(self.propagator_1.propagate, self.C, L, i=1) else: raise NotImplementedError('This combination of parameters is not implemented') diff --git a/models/predictors.py b/models/predictors.py index 0d49b0e5..3da7b9a2 100644 --- a/models/predictors.py +++ b/models/predictors.py @@ -18,17 +18,19 @@ class Predictor(object): - def __init__(self, V, T, seqlens, i=0): - self.predictor = 'CRF' + def __init__(self, V, T, L, i=0): + scope_id = 'CRF-{:}'.format(i) self.V = V self.T = T - self.Tflat = tf.cast(tf.argmax(T, 2), tf.int32) - self.seqlens = seqlens + self.L = L self.i = i - self.score - self.cost - self.predict + self.Tflat = tf.cast(tf.argmax(T, 2), tf.int32) + + with tf.variable_scope(scope_id): + self.score + self.cost + self.predict def score(self): raise NotImplementedError('Predictor must implement cost') @@ -71,7 +73,7 @@ def cost(self): ''' scope_id = 'cost{:}'.format(self.i) with tf.variable_scope(scope_id): - args = (self.S, self.Tflat, self.seqlens) + args = (self.S, self.Tflat, self.L) log_likelihood, self.transition_params = crf_log_likelihood(*args) return tf.reduce_mean(-log_likelihood) @@ -91,7 +93,7 @@ def predict(self): scope_id = 'prediction{:}'.format(self.i) with tf.variable_scope(scope_id): # Compute the viterbi sequence and score. - args = (self.S, self.transition_params, self.seqlens) + args = (self.S, self.transition_params, self.L) viterbi_sequence, viterbi_score = crf_decode(*args) return tf.cast(viterbi_sequence, tf.int32) @@ -107,67 +109,4 @@ def wo_shape(self): def bo_shape(self): # Static dimensions are of class Dimension(n) t = int(self.T.get_shape()[-1]) - return (t,) - -# class CRFDualLabelPredictor(Predictor): -# '''Computes the viterbi_score for dual label tasks - -# Previous works show that the argument recognition subtask is -# important. Have it being computed in parallel instead of -# having it computed as a pipeline - -# Instead of having: -# B-A0, I-A0, B-V, B-A1, I-A1, I-A1 - -# Have: -# (B, A0), (I, A0), (B, V), (B, A1), (I, A1), (I, A1) - -# Extends: -# Predictor -# ''' -# def __init__(self, Scores, T, seqlens): -# self.predictor = 'CRF' - -# self.Scores = Scores -# self.T = tf.cast(tf.argmax(T, 2), tf.int32) -# self.seqlens = seqlens - -# self.cost -# self.predict - -# @lazy_property -# def cost(self): -# '''Computes the viterbi_score after the propagation step, returns the cost. - -# Consumes the representation coming from propagation layer, evaluates -# the log_likelihod and parameters - -# Decorators: -# lazy_property - -# Returns: -# cost {tf.float64} -- A scalar holding the average log_likelihood -# of the loss by estimatiof -# ''' -# with tf.variable_scope('cost'): -# log_likelihood, self.transition_params = tf.contrib.crf.crf_log_likelihood(self.Scores, self.T, self.seqlens) - -# return tf.reduce_mean(-log_likelihood) - -# @lazy_property -# def predict(self): -# '''Decodes the viterbi score for the inputs - -# Consumes both results from propagation and and cost layers - -# Decorators: -# lazy_property - -# Returns: -# [type] -- [description] -# ''' -# with tf.variable_scope('prediction'): -# # Compute the viterbi sequence and score. -# viterbi_sequence, viterbi_score = tf.contrib.crf.crf_decode(self.Scores, self.transition_params, self.seqlens) - -# return tf.cast(viterbi_sequence, tf.int32) \ No newline at end of file + return (t,) \ No newline at end of file diff --git a/models/propagators.py b/models/propagators.py index bc13f250..e50bb589 100644 --- a/models/propagators.py +++ b/models/propagators.py @@ -30,15 +30,18 @@ def get_unit(sz, ru='BasicLSTM'): class Propagator(object): - def __init__(self, V, seqlens, hidden_sz_list, ru='BasicLSTMCell'): - self.ru = ru - self.propagator = 'interleaved' + def __init__(self, V, L, hidden_sz_list, ru='BasicLSTMCell', i=0): + scope_id = 'DB{:}{:}'.format(ru, i) - self.hidden_sz_list = hidden_sz_list self.V = V - self.seqlens = seqlens + self.L = L - self.propagate + self.ru = ru + self.i = i + + self.hidden_sz_list = hidden_sz_list + with tf.variable_scope(scope_id): + self.propagate def propagate(self): raise NotImplementedError('Propagator must implement propagate') @@ -68,7 +71,7 @@ def propagate(self): outputs_fw, states = tf.nn.dynamic_rnn( cell=self.cell_fw, inputs=self.V, - sequence_length=self.seqlens, + sequence_length=self.L, dtype=tf.float32, time_major=False ) @@ -78,7 +81,7 @@ def propagate(self): inputs_bw = tf.reverse_sequence( outputs_fw, - self.seqlens, + self.L, batch_axis=0, seq_axis=1 ) @@ -86,13 +89,13 @@ def propagate(self): outputs_bw, states = tf.nn.dynamic_rnn( cell=self.cell_bw, inputs=inputs_bw, - sequence_length=self.seqlens, + sequence_length=self.L, dtype=tf.float32, time_major=False ) outputs_bw = tf.reverse_sequence( outputs_bw, - self.seqlens, + self.L, batch_axis=0, seq_axis=1 ) @@ -108,7 +111,7 @@ def propagate(self): outputs_fw, states = tf.nn.dynamic_rnn( cell=self.cell_fw, inputs=inputs_fw, - sequence_length=self.seqlens, + sequence_length=self.L, dtype=tf.float32, time_major=False) @@ -116,7 +119,7 @@ def propagate(self): inputs_bw = tf.concat((outputs_fw, h), axis=2) inputs_bw = tf.reverse_sequence( inputs_bw, - self.seqlens, + self.L, batch_axis=0, seq_axis=1 ) @@ -125,13 +128,13 @@ def propagate(self): outputs_bw, states = tf.nn.dynamic_rnn( cell=self.cell_bw, inputs=inputs_bw, - sequence_length=self.seqlens, + sequence_length=self.L, dtype=tf.float32, time_major=False) outputs_bw = tf.reverse_sequence( outputs_bw, - self.seqlens, + self.L, batch_axis=0, seq_axis=1 ) diff --git a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 125533/params.json b/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 125533/params.json deleted file mode 100644 index 64b5675c..00000000 --- a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 125533/params.json +++ /dev/null @@ -1 +0,0 @@ -{"batch_size": 250, "chunks": false, "embeddings_model": "wan50", "embeddings_trainable": false, "epochs": 1000, "hidden_layers": [16, 16], "input_labels": ["ID", "FORM", "MARKER", "GPOS", "FORM_CTX_P-1", "FORM_CTX_P+0", "FORM_CTX_P+1"], "lr": 0.005, "r_depth": "1", "ru": "BasicLSTM", "target_labels": ["R", "IOB"], "version": "1.0"} \ No newline at end of file diff --git a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 125605/params.json b/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 125605/params.json deleted file mode 100644 index 810c4dbd..00000000 --- a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 125605/params.json +++ /dev/null @@ -1 +0,0 @@ -{"batch_size": 250, "chunks": false, "embeddings_model": "wan50", "embeddings_trainable": false, "epochs": 1000, "hidden_layers": [16, 16], "input_labels": ["ID", "FORM", "MARKER", "GPOS", "FORM_CTX_P-1", "FORM_CTX_P+0", "FORM_CTX_P+1"], "lr": 0.005, "r_depth": 1, "ru": "BasicLSTM", "target_labels": ["R", "IOB"], "version": "1.0"} \ No newline at end of file diff --git a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130123/params.json b/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130123/params.json deleted file mode 100644 index 810c4dbd..00000000 --- a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130123/params.json +++ /dev/null @@ -1 +0,0 @@ -{"batch_size": 250, "chunks": false, "embeddings_model": "wan50", "embeddings_trainable": false, "epochs": 1000, "hidden_layers": [16, 16], "input_labels": ["ID", "FORM", "MARKER", "GPOS", "FORM_CTX_P-1", "FORM_CTX_P+0", "FORM_CTX_P+1"], "lr": 0.005, "r_depth": 1, "ru": "BasicLSTM", "target_labels": ["R", "IOB"], "version": "1.0"} \ No newline at end of file diff --git a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130243/params.json b/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130243/params.json deleted file mode 100644 index 810c4dbd..00000000 --- a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130243/params.json +++ /dev/null @@ -1 +0,0 @@ -{"batch_size": 250, "chunks": false, "embeddings_model": "wan50", "embeddings_trainable": false, "epochs": 1000, "hidden_layers": [16, 16], "input_labels": ["ID", "FORM", "MARKER", "GPOS", "FORM_CTX_P-1", "FORM_CTX_P+0", "FORM_CTX_P+1"], "lr": 0.005, "r_depth": 1, "ru": "BasicLSTM", "target_labels": ["R", "IOB"], "version": "1.0"} \ No newline at end of file diff --git a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130615/params.json b/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130615/params.json deleted file mode 100644 index 810c4dbd..00000000 --- a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130615/params.json +++ /dev/null @@ -1 +0,0 @@ -{"batch_size": 250, "chunks": false, "embeddings_model": "wan50", "embeddings_trainable": false, "epochs": 1000, "hidden_layers": [16, 16], "input_labels": ["ID", "FORM", "MARKER", "GPOS", "FORM_CTX_P-1", "FORM_CTX_P+0", "FORM_CTX_P+1"], "lr": 0.005, "r_depth": 1, "ru": "BasicLSTM", "target_labels": ["R", "IOB"], "version": "1.0"} \ No newline at end of file diff --git a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130744/params.json b/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130744/params.json deleted file mode 100644 index 810c4dbd..00000000 --- a/outputs/1.0/wan50/hs_16x16/ctxp_1/R_IOB/batch/lr_5.00e-03/2018-10-01 130744/params.json +++ /dev/null @@ -1 +0,0 @@ -{"batch_size": 250, "chunks": false, "embeddings_model": "wan50", "embeddings_trainable": false, "epochs": 1000, "hidden_layers": [16, 16], "input_labels": ["ID", "FORM", "MARKER", "GPOS", "FORM_CTX_P-1", "FORM_CTX_P+0", "FORM_CTX_P+1"], "lr": 0.005, "r_depth": 1, "ru": "BasicLSTM", "target_labels": ["R", "IOB"], "version": "1.0"} \ No newline at end of file