Skip to content

Commit

Permalink
p3 exploration hw5
Browse files Browse the repository at this point in the history
  • Loading branch information
Khodeir-hubdoc committed Feb 8, 2019
1 parent aa8d9c8 commit e741ae8
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 46 deletions.
84 changes: 41 additions & 43 deletions hw5/exp/density_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,6 @@ def __init__(self, ob_dim, hid_dim, learning_rate, kl_weight):

def build_computation_graph(self):
"""
### PROBLEM 3
### YOUR CODE HERE
TODO:
1. self.log_likelihood. shape: (batch_size)
- use tf.squeeze
Expand All @@ -180,15 +177,13 @@ def build_computation_graph(self):
self.encoder1, self.encoder2, self.prior, self.discriminator = self.forward_pass(self.state1, self.state2)
self.discrim_target = tf.placeholder(shape=[None, 1], name="discrim_target", dtype=tf.float32)

raise NotImplementedError
self.log_likelihood = None
self.likelihood = None
self.kl = None
self.log_likelihood = tf.squeeze(self.discriminator.log_prob(self.discrim_target), axis=[1])
self.likelihood = tf.squeeze(self.discriminator.prob(self.discrim_target), axis=[1])
self.kl = self.encoder1.kl_divergence(self.prior) + self.encoder2.kl_divergence(self.prior)
assert len(self.log_likelihood.shape) == len(self.likelihood.shape) == len(self.kl.shape) == 1

raise NotImplementedError
self.elbo = None
self.update_op = None
self.elbo = tf.reduce_mean(self.log_likelihood - self.kl * self.kl_weight, axis=0)
self.update_op = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(-self.elbo)

def define_placeholders(self):
state1 = tf.placeholder(shape=[None, self.ob_dim], name="s1", dtype=tf.float32)
Expand All @@ -197,9 +192,6 @@ def define_placeholders(self):

def make_encoder(self, state, z_size, scope, n_layers, hid_size):
"""
### PROBLEM 3
### YOUR CODE HERE
args:
state: tf variable
z_size: output dimension of the encoder network
Expand All @@ -216,31 +208,33 @@ def make_encoder(self, state, z_size, scope, n_layers, hid_size):
Hint: use build_mlp
"""
# z_mean = raise NotImplementedError
# z_logstd = raise NotImplementedError
z_mean = build_mlp(
input_placeholder=state,
output_size=z_size,
scope=scope,
n_layers=n_layers,
size=hid_size,
activation=tf.tanh,
output_activation=None
)
z_logstd = tf.Variable(initial_value=np.zeros(shape=(int(z_size), ), dtype=np.float32), trainable=True)
return tfp.distributions.MultivariateNormalDiag(loc=z_mean, scale_diag=tf.exp(z_logstd))

def make_prior(self, z_size):
"""
### PROBLEM 3
### YOUR CODE HERE
args:
z_size: output dimension of the encoder network
TODO:
prior_mean and prior_logstd are for a standard normal distribution
both have dimension z_size
"""
# prior_mean = raise NotImplementedError
# prior_logstd = raise NotImplementedError
prior_mean = tf.zeros((z_size,))
prior_logstd = tf.ones((z_size,))
return tfp.distributions.MultivariateNormalDiag(loc=prior_mean, scale_diag=tf.exp(prior_logstd))

def make_discriminator(self, z, output_size, scope, n_layers, hid_size):
"""
### PROBLEM 3
### YOUR CODE HERE
args:
z: input to to discriminator network
output_size: output dimension of discriminator network
Expand All @@ -255,14 +249,19 @@ def make_discriminator(self, z, output_size, scope, n_layers, hid_size):
Hint: use build_mlp
"""
# logit = raise NotImplementedError
logit = build_mlp(
input_placeholder=z,
output_size=output_size,
scope=scope,
n_layers=n_layers,
size=hid_size,
activation=tf.tanh,
output_activation=None
)
return tfp.distributions.Bernoulli(logit)

def forward_pass(self, state1, state2):
"""
### PROBLEM 3
### YOUR CODE HERE
args:
state1: tf variable
state2: tf variable
Expand Down Expand Up @@ -293,19 +292,16 @@ def forward_pass(self, state1, state2):
prior = self.make_prior(self.hid_dim/2)

# Sampled Latent
# z1 = raise NotImplementedError
# z2 = raise NotImplementedError
# z = raise NotImplementedError
z1 = encoder1.sample()
z2 = encoder2.sample()
z = tf.concat([z1, z2], axis=1)

# Discriminator
discriminator = make_discriminator(z, 1, 'discriminator', n_layers=2, hid_size=self.hid_dim)
return encoder1, encoder2, prior, discriminator

def update(self, state1, state2, target):
"""
### PROBLEM 3
### YOUR CODE HERE
args:
state1: np array (batch_size, ob_dim)
state2: np array (batch_size, ob_dim)
Expand All @@ -320,14 +316,15 @@ def update(self, state1, state2, target):
assert state1.ndim == state2.ndim == target.ndim
assert state1.shape[1] == state2.shape[1] == self.ob_dim
assert state1.shape[0] == state2.shape[0] == target.shape[0]
raise NotImplementedError
_, ll, kl, elbo = self.sess.run([self.update_op, self.log_likelihood, self.kl, self.elbo], feed_dict = {
self.state1: state1,
self.state2: state2,
self.discrim_target: target
})
return ll, kl, elbo

def get_likelihood(self, state1, state2):
"""
### PROBLEM 3
### YOUR CODE HERE
args:
state1: np array (batch_size, ob_dim)
state2: np array (batch_size, ob_dim)
Expand All @@ -341,14 +338,15 @@ def get_likelihood(self, state1, state2):
assert state1.ndim == state2.ndim
assert state1.shape[1] == state2.shape[1] == self.ob_dim
assert state1.shape[0] == state2.shape[0]
raise NotImplementedError
likelihood = self.sess.run(self.likelihood, feed_dict={
self.state1: state1,
self.state2: state2,
self.discrim_target: np.ones((state1.shape[0], 1))
})
return likelihood

def get_prob(self, state):
"""
### PROBLEM 3
### YOUR CODE HERE
args:
state: np array (batch_size, ob_dim)
Expand All @@ -359,8 +357,8 @@ def get_prob(self, state):
compute the probability density of x from the discriminator
likelihood (see homework doc)
"""
# likelihood = raise NotImplementedError
likelihood = self.get_likelihood(state, state)
# avoid divide by 0 and log(0)
likelihood = np.clip(np.squeeze(likelihood), 1e-5, 1-1e-5)
# prob = raise NotImplementedError
prob = (1 - likelihood) / likelihood
return prob
4 changes: 1 addition & 3 deletions hw5/exp/train_ac_exploration_f18.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,9 +534,7 @@ def train_AC(
else:
# 1. Fit density model
if dm == 'ex2':
### PROBLEM 3
### YOUR CODE HERE
raise NotImplementedError
ll, kl, elbo = exploration.fit_density_model(ob_no)
elif dm == 'hist' or dm == 'rbf':
exploration.fit_density_model(ob_no)
else:
Expand Down

0 comments on commit e741ae8

Please sign in to comment.