issue #26: Performing EMB lookup running ok

guilhermevarela · Oct 30, 2018 · 8a08332 · 8a08332
1 parent 58e7eeb
commit 8a08332
Show file tree

Hide file tree

Showing 5 changed files with 31 additions and 26 deletions.
diff --git a/config/pt/wan50.yaml b/config/pt/wan50.yaml
@@ -5,14 +5,14 @@ CHUNK_ID: {category: feature, dims: null, size: 1, type: int}
 CHUNK_LEN: {category: feature, dims: null, size: 1, type: int}
 CHUNK_START: {category: feature, dims: null, size: 1, type: int}
 CTREE: {category: feature, dims: 310, size: 310, type: choice}
-FORM: {category: feature, dims: 13207, size: 50, type: text}
-FORM_CTX_P+0: {category: feature, dims: 13207, size: 50, type: text}
-FORM_CTX_P+1: {category: feature, dims: 13207, size: 50, type: text}
-FORM_CTX_P+2: {category: feature, dims: 13207, size: 50, type: text}
-FORM_CTX_P+3: {category: feature, dims: 13207, size: 50, type: text}
-FORM_CTX_P-1: {category: feature, dims: 13207, size: 50, type: text}
-FORM_CTX_P-2: {category: feature, dims: 13207, size: 50, type: text}
-FORM_CTX_P-3: {category: feature, dims: 13207, size: 50, type: text}
+FORM: {category: feature, dims: 13207, size: 1, type: text}
+FORM_CTX_P+0: {category: feature, dims: 13207, size: 1, type: text}
+FORM_CTX_P+1: {category: feature, dims: 13207, size: 1, type: text}
+FORM_CTX_P+2: {category: feature, dims: 13207, size: 1, type: text}
+FORM_CTX_P+3: {category: feature, dims: 13207, size: 1, type: text}
+FORM_CTX_P-1: {category: feature, dims: 13207, size: 1, type: text}
+FORM_CTX_P-2: {category: feature, dims: 13207, size: 1, type: text}
+FORM_CTX_P-3: {category: feature, dims: 13207, size: 1, type: text}
 GPOS: {category: feature, dims: 26, size: 26, type: choice}
 GPOS_CTX_P+0: {category: feature, dims: 26, size: 26, type: choice}
 GPOS_CTX_P+1: {category: feature, dims: 26, size: 26, type: choice}
@@ -24,18 +24,18 @@ GPOS_CTX_P-3: {category: feature, dims: 26, size: 26, type: choice}
 ID: {category: feature, dims: null, size: 1, type: int}
 INDEX: {category: meta, dims: null, size: 1, type: int}
 IOB: {category: target, dims: 37, size: 37, type: choice}
-LEMMA: {category: feature, dims: 9006, size: 50, type: text}
-LEMMA_CTX_P+0: {category: feature, dims: 9006, size: 50, type: text}
-LEMMA_CTX_P+1: {category: feature, dims: 9006, size: 50, type: text}
-LEMMA_CTX_P+2: {category: feature, dims: 9006, size: 50, type: text}
-LEMMA_CTX_P+3: {category: feature, dims: 9006, size: 50, type: text}
-LEMMA_CTX_P-1: {category: feature, dims: 9006, size: 50, type: text}
-LEMMA_CTX_P-2: {category: feature, dims: 9006, size: 50, type: text}
-LEMMA_CTX_P-3: {category: feature, dims: 9006, size: 50, type: text}
+LEMMA: {category: feature, dims: 9006, size: 1, type: text}
+LEMMA_CTX_P+0: {category: feature, dims: 9006, size: 1, type: text}
+LEMMA_CTX_P+1: {category: feature, dims: 9006, size: 1, type: text}
+LEMMA_CTX_P+2: {category: feature, dims: 9006, size: 1, type: text}
+LEMMA_CTX_P+3: {category: feature, dims: 9006, size: 1, type: text}
+LEMMA_CTX_P-1: {category: feature, dims: 9006, size: 1, type: text}
+LEMMA_CTX_P-2: {category: feature, dims: 9006, size: 1, type: text}
+LEMMA_CTX_P-3: {category: feature, dims: 9006, size: 1, type: text}
 MARKER: {category: feature, dims: null, size: 1, type: int}
 MORF: {category: feature, dims: 113, size: 113, type: choice}
 P: {category: meta, dims: null, size: 1, type: int}
-PRED: {category: feature, dims: 1025, size: 50, type: text}
+PRED: {category: feature, dims: 1025, size: 1, type: text}
 P_S: {category: meta, dims: null, size: 1, type: int}
 R: {category: target, dims: 3, size: 3, type: choice}
 S: {category: meta, dims: null, size: 1, type: int}

diff --git a/datasets/scripts/tfrecords2.py b/datasets/scripts/tfrecords2.py
@@ -581,20 +581,24 @@ def _protobuf_with_embeddings_process(
             ind = tf.one_hot(ind, config_dict[key]['dims'], dtype=tf.float32)
             ind = tf.squeeze(ind, axis=1)
         else:
-            ind = tf.cast(sequence_features[key], tf.float32)
+            if key in ('INDEX',):
+                ind = tf.squeeze(ind, axis=1)
+            else:
+                ind = tf.cast(sequence_features[key], tf.float32)
+
 
         if key in input_labels:
             sequence_inputs.append(ind)
 
         elif key in output_labels:
             sequence_outputs.append(ind)
 
-        elif key in ['INDEX']:
-            sequence_descriptors.append(ind)
+        elif key in ('INDEX',):
+            sequence_descriptors = ind
 
     X = tf.concat(sequence_inputs, 1)
     T = tf.concat(sequence_outputs, 1)
-    D = tf.concat(sequence_descriptors, 1)
+    D = sequence_descriptors
 
     return X, T, L, D
 

diff --git a/models/propbank_encoder.py b/models/propbank_encoder.py
@@ -241,6 +241,7 @@ def decode_npyarray(self, Y, I, seq_list, target_labels,
                   for j, item in enumerate(sublist) if j < seq_list[i]]
 
         zip_list = sorted(zip(index, values), key=lambda x: x[0])
+
         target_dict = OrderedDict(zip_list)
 
 

diff --git a/preprocess.sh b/preprocess.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
-python preprocess.py glove_s50 --version 1.0
+# python preprocess.py glove_s50 --version 1.0
 python preprocess.py wang2vec_s50 --version 1.0
 python preprocess.py wang2vec_s100 --version 1.0
 python preprocess.py wang2vec_s300 --version 1.0

diff --git a/srl.py b/srl.py
@@ -169,7 +169,7 @@
             agent.fit()
 
         print(f'Best validation F1 -- {agent.best_validation_rate}')
-        test_f1 = agent.evaluate_testset()
-        print(f'Best test F1 -- {test_f1}')
-        valid_f1 = agent.evaluate_validset()
-        print(f'Best validation F1 -- {valid_f1}')
+        # test_f1 = agent.evaluate_testset()
+        # print(f'Best test F1 -- {test_f1}')
+        # valid_f1 = agent.evaluate_validset()
+        # print(f'Best validation F1 -- {valid_f1}')