p3 exploration hw5

Khodeir · Feb 8, 2019 · e741ae8 · e741ae8
1 parent aa8d9c8
commit e741ae8
Show file tree

Hide file tree

Showing 2 changed files with 42 additions and 46 deletions.
diff --git a/hw5/exp/density_model.py b/hw5/exp/density_model.py
@@ -153,9 +153,6 @@ def __init__(self, ob_dim, hid_dim, learning_rate, kl_weight):
 
     def build_computation_graph(self):
         """
-            ### PROBLEM 3
-            ### YOUR CODE HERE
-
             TODO:
                 1. self.log_likelihood. shape: (batch_size)
                     - use tf.squeeze
@@ -180,15 +177,13 @@ def build_computation_graph(self):
         self.encoder1, self.encoder2, self.prior, self.discriminator = self.forward_pass(self.state1, self.state2)
         self.discrim_target = tf.placeholder(shape=[None, 1], name="discrim_target", dtype=tf.float32)
 
-        raise NotImplementedError
-        self.log_likelihood = None
-        self.likelihood = None
-        self.kl = None
+        self.log_likelihood = tf.squeeze(self.discriminator.log_prob(self.discrim_target), axis=[1])
+        self.likelihood = tf.squeeze(self.discriminator.prob(self.discrim_target), axis=[1])
+        self.kl = self.encoder1.kl_divergence(self.prior) + self.encoder2.kl_divergence(self.prior)
         assert len(self.log_likelihood.shape) == len(self.likelihood.shape) == len(self.kl.shape) == 1
 
-        raise NotImplementedError
-        self.elbo = None
-        self.update_op = None
+        self.elbo = tf.reduce_mean(self.log_likelihood - self.kl * self.kl_weight, axis=0)
+        self.update_op = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(-self.elbo)
 
     def define_placeholders(self):
         state1 = tf.placeholder(shape=[None, self.ob_dim], name="s1", dtype=tf.float32)
@@ -197,9 +192,6 @@ def define_placeholders(self):
 
     def make_encoder(self, state, z_size, scope, n_layers, hid_size):
         """
-            ### PROBLEM 3
-            ### YOUR CODE HERE
-
             args:
                 state: tf variable
                 z_size: output dimension of the encoder network
@@ -216,31 +208,33 @@ def make_encoder(self, state, z_size, scope, n_layers, hid_size):
 
             Hint: use build_mlp
         """
-        # z_mean = raise NotImplementedError
-        # z_logstd = raise NotImplementedError
+        z_mean = build_mlp(
+            input_placeholder=state,
+            output_size=z_size,
+            scope=scope,
+            n_layers=n_layers,
+            size=hid_size,
+            activation=tf.tanh,
+            output_activation=None
+        )
+        z_logstd = tf.Variable(initial_value=np.zeros(shape=(int(z_size), ), dtype=np.float32), trainable=True)
         return tfp.distributions.MultivariateNormalDiag(loc=z_mean, scale_diag=tf.exp(z_logstd))
 
     def make_prior(self, z_size):
         """
-            ### PROBLEM 3
-            ### YOUR CODE HERE
-
             args:
                 z_size: output dimension of the encoder network
 
             TODO:
                 prior_mean and prior_logstd are for a standard normal distribution
                     both have dimension z_size
         """
-        # prior_mean = raise NotImplementedError
-        # prior_logstd = raise NotImplementedError
+        prior_mean = tf.zeros((z_size,))
+        prior_logstd = tf.ones((z_size,))
         return tfp.distributions.MultivariateNormalDiag(loc=prior_mean, scale_diag=tf.exp(prior_logstd))
 
     def make_discriminator(self, z, output_size, scope, n_layers, hid_size):
         """
-            ### PROBLEM 3
-            ### YOUR CODE HERE
-
             args:
                 z: input to to discriminator network
                 output_size: output dimension of discriminator network
@@ -255,14 +249,19 @@ def make_discriminator(self, z, output_size, scope, n_layers, hid_size):
 
             Hint: use build_mlp
         """
-        # logit = raise NotImplementedError
+        logit = build_mlp(
+            input_placeholder=z,
+            output_size=output_size,
+            scope=scope,
+            n_layers=n_layers,
+            size=hid_size,
+            activation=tf.tanh,
+            output_activation=None
+        )
         return tfp.distributions.Bernoulli(logit)
 
     def forward_pass(self, state1, state2):
         """
-            ### PROBLEM 3
-            ### YOUR CODE HERE
-
             args:
                 state1: tf variable
                 state2: tf variable
@@ -293,19 +292,16 @@ def forward_pass(self, state1, state2):
         prior = self.make_prior(self.hid_dim/2)
 
         # Sampled Latent
-        # z1 = raise NotImplementedError
-        # z2 = raise NotImplementedError
-        # z = raise NotImplementedError
+        z1 = encoder1.sample()
+        z2 = encoder2.sample()
+        z = tf.concat([z1, z2], axis=1)
 
         # Discriminator
         discriminator = make_discriminator(z, 1, 'discriminator', n_layers=2, hid_size=self.hid_dim)
         return encoder1, encoder2, prior, discriminator
 
     def update(self, state1, state2, target):
         """
-            ### PROBLEM 3
-            ### YOUR CODE HERE
-
             args:
                 state1: np array (batch_size, ob_dim)
                 state2: np array (batch_size, ob_dim)
@@ -320,14 +316,15 @@ def update(self, state1, state2, target):
         assert state1.ndim == state2.ndim == target.ndim
         assert state1.shape[1] == state2.shape[1] == self.ob_dim
         assert state1.shape[0] == state2.shape[0] == target.shape[0]
-        raise NotImplementedError
+        _, ll, kl, elbo = self.sess.run([self.update_op, self.log_likelihood, self.kl, self.elbo], feed_dict = {
+            self.state1: state1,
+            self.state2: state2,
+            self.discrim_target: target
+        })
         return ll, kl, elbo
 
     def get_likelihood(self, state1, state2):
         """
-            ### PROBLEM 3
-            ### YOUR CODE HERE
-
             args:
                 state1: np array (batch_size, ob_dim)
                 state2: np array (batch_size, ob_dim)
@@ -341,14 +338,15 @@ def get_likelihood(self, state1, state2):
         assert state1.ndim == state2.ndim
         assert state1.shape[1] == state2.shape[1] == self.ob_dim
         assert state1.shape[0] == state2.shape[0]
-        raise NotImplementedError
+        likelihood = self.sess.run(self.likelihood, feed_dict={
+            self.state1: state1,
+            self.state2: state2,
+            self.discrim_target: np.ones((state1.shape[0], 1))
+        })
         return likelihood
 
     def get_prob(self, state):
         """
-            ### PROBLEM 3
-            ### YOUR CODE HERE
-        
             args:
                 state: np array (batch_size, ob_dim)
 
@@ -359,8 +357,8 @@ def get_prob(self, state):
                     compute the probability density of x from the discriminator
                     likelihood (see homework doc)
         """
-        # likelihood = raise NotImplementedError
+        likelihood = self.get_likelihood(state, state)
         # avoid divide by 0 and log(0)
         likelihood = np.clip(np.squeeze(likelihood), 1e-5, 1-1e-5)
-        # prob = raise NotImplementedError
+        prob = (1 - likelihood) / likelihood
         return prob
diff --git a/hw5/exp/train_ac_exploration_f18.py b/hw5/exp/train_ac_exploration_f18.py
@@ -534,9 +534,7 @@ def train_AC(
         else:
             # 1. Fit density model
             if dm == 'ex2':
-                ### PROBLEM 3
-                ### YOUR CODE HERE
-                raise NotImplementedError
+                ll, kl, elbo = exploration.fit_density_model(ob_no)
             elif dm == 'hist' or dm == 'rbf':
                 exploration.fit_density_model(ob_no)
             else: