diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..32fd2240 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +*.DS_Store +__pycache__/ +env/ +*.pyc +*.pkl +*.p +mujoco/mjpro131 +*.h5 diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..9fcab3c0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,43 @@ +deep-neuroevolution +Copyright (c) 2018 Uber Technologies, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +deep-neuroevolution includes: + +evolution-strategies-starter +Copyright (c) 2016 OpenAI (http://openai.com) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 00000000..86afec4f --- /dev/null +++ b/README.md @@ -0,0 +1,78 @@ +## AI Labs Neuroevolution Algorithms + +This repo contains distributed implementations of the algorithms described in: + +[1] [Deep Neuroevolution: Genetic Algorithms Are a Competitive Alternative for Training Deep Neural Networks for Reinforcement Learning](https://arxiv.org/abs/1712.06567) + +[2] [Improving Exploration in Evolution Strategies for Deep Reinforcement Learning via a Population of Novelty-Seeking Agents](https://arxiv.org/abs/1712.06560) + +Our code is based off of code from OpenAI, who we thank. The original code and related paper from OpenAI can be found here. The repo has been modified to run both ES and our algorithms, including our Deep Genetic Algorithm (DeepGA) locally and on AWS. + +Note: The Humanoid experiment depends on [Mujoco](http://www.mujoco.org/). Please provide your own Mujoco license and binary + +The blog post describing these papers can be found [here](https://eng.uber.com/deep-neuroevolution/) + +## How to run locally + +clone repo + +``` +git clone https://github.com/uber-common/deep-neuroevolution.git +``` + +create python3 virtual env + +``` +python3 -m venv env +. env/bin/activate +``` + +install requirements +``` +pip install -r requirements.txt +``` +If you plan to use the mujoco env, make sure to follow [mujoco-py](https://github.com/openai/mujoco-py)'s readme about how to install mujoco correctly + +launch redis +``` +. scripts/local_run_redis.sh +``` + +launch sample ES experiment +``` +. scripts/local_run_exp.sh es configurations/frostbite_es.json # For the Atari game Frostbite +. scripts/local_run_exp.sh es configurations/humanoid.json # For the MuJoCo Humanoid-v1 environment +``` + +launch sample NS-ES experiment +``` +. scripts/local_run_exp.sh ns-es configurations/frostbite_nses.json +. scripts/local_run_exp.sh ns-es configurations/humanoid_nses.json +``` + +launch sample NSR-ES experiment +``` +. scripts/local_run_exp.sh nsr-es configurations/frostbite_nsres.json +. scripts/local_run_exp.sh nsr-es configurations/humanoid_nsres.json +``` + +launch sample GA experiment +``` +. scripts/local_run_exp.sh ga configurations/frostbite_ga.json # For the Atari game Frostbite +``` + +launch sample Random Search experiment +``` +. scripts/local_run_exp.sh rs configurations/frostbite_ga.json # For the Atari game Frostbite +``` + + +visualize results by running a policy file +``` +python -m scripts.viz 'FrostbiteNoFrameskip-v4' +python -m scripts.viz 'Humanoid-v1' +``` + +### extra folder +The extra folder holds the XML specification file for the Humanoid +Locomotion with Deceptive Trap domain used in https://arxiv.org/abs/1712.06560. Use this XML file in gym to recreate the environment. diff --git a/configurations/frostbite_es.json b/configurations/frostbite_es.json new file mode 100644 index 00000000..3de725c5 --- /dev/null +++ b/configurations/frostbite_es.json @@ -0,0 +1,24 @@ +{ + "config": { + "calc_obstat_prob": 0.0, + "episodes_per_batch": 5000, + "eval_prob": 0.01, + "l2coeff": 0.005, + "noise_stdev": 0.005, + "snapshot_freq": 20, + "timesteps_per_batch": 10000, + "return_proc_mode": "centered_rank", + "episode_cutoff_mode": 5000 + }, + "env_id": "FrostbiteNoFrameskip-v4", + "optimizer": { + "args": { + "stepsize": 0.01 + }, + "type": "adam" + }, + "policy": { + "args" : {}, + "type": "ESAtariPolicy" + } +} diff --git a/configurations/frostbite_ga.json b/configurations/frostbite_ga.json new file mode 100644 index 00000000..8ad2a51a --- /dev/null +++ b/configurations/frostbite_ga.json @@ -0,0 +1,22 @@ +{ + "config": { + "calc_obstat_prob": 0.0, + "episodes_per_batch": 5000, + "eval_prob": 0.01, + "l2coeff": 0.005, + "noise_stdev": 0.005, + "snapshot_freq": 20, + "timesteps_per_batch": 10000, + "return_proc_mode": "centered_rank", + "episode_cutoff_mode": 5000 + }, + "population_size": 10, + "num_elites": 1, + "env_id": "FrostbiteNoFrameskip-v4", + "policy": { + "args" : { + "nonlin_type": "relu" + }, + "type": "GAAtariPolicy" + } +} diff --git a/configurations/frostbite_nses.json b/configurations/frostbite_nses.json new file mode 100644 index 00000000..93e60e23 --- /dev/null +++ b/configurations/frostbite_nses.json @@ -0,0 +1,31 @@ +{ + "config": { + "calc_obstat_prob": 0.0, + "episodes_per_batch": 100, + "eval_prob": 0.03, + "l2coeff": 0.005, + "noise_stdev": 0.02, + "snapshot_freq": 10, + "timesteps_per_batch": 1000, + "return_proc_mode": "centered_sign_rank", + "episode_cutoff_mode": 5000 + }, + "env_id": "FrostbiteNoFrameskip-v4", + "algo_type": "ns", + "novelty_search": { + "k": 10, + "population_size": 3, + "num_rollouts": 1, + "selection_method": "novelty_prob" + }, + "optimizer": { + "args": { + "stepsize": 0.01 + }, + "type": "adam" + }, + "policy": { + "args": {}, + "type": "ESAtariPolicy" + } +} diff --git a/configurations/frostbite_nsres.json b/configurations/frostbite_nsres.json new file mode 100644 index 00000000..c8959fdc --- /dev/null +++ b/configurations/frostbite_nsres.json @@ -0,0 +1,31 @@ +{ + "config": { + "calc_obstat_prob": 0.0, + "episodes_per_batch": 1000, + "eval_prob": 0.1, + "l2coeff": 0.005, + "noise_stdev": 0.02, + "snapshot_freq": 10, + "timesteps_per_batch": 10000, + "return_proc_mode": "centered_sign_rank", + "episode_cutoff_mode": 5000 + }, + "env_id": "FrostbiteNoFrameskip-v4", + "algo_type": "nsr", + "novelty_search": { + "k": 10, + "population_size": 3, + "num_rollouts": 1, + "selection_method": "novelty_prob" + }, + "optimizer": { + "args": { + "stepsize": 0.01 + }, + "type": "adam" + }, + "policy": { + "args": {}, + "type": "ESAtariPolicy" + } +} diff --git a/configurations/humanoid.json b/configurations/humanoid.json new file mode 100644 index 00000000..59960572 --- /dev/null +++ b/configurations/humanoid.json @@ -0,0 +1,34 @@ +{ + "config": { + "calc_obstat_prob": 0.01, + "episodes_per_batch": 1000, + "eval_prob": 0.03, + "l2coeff": 0.005, + "noise_stdev": 0.02, + "snapshot_freq": 10, + "timesteps_per_batch": 100000, + "return_proc_mode": "centered_rank", + "episode_cutoff_mode": "env_default" + }, + "env_id": "Humanoid-v1", + "exp_prefix": "humanoid", + "optimizer": { + "args": { + "stepsize": 0.01 + }, + "type": "adam" + }, + "policy": { + "args": { + "ac_bins": "continuous:", + "ac_noise_std": 0.01, + "connection_type": "ff", + "hidden_dims": [ + 256, + 256 + ], + "nonlin_type": "tanh" + }, + "type": "MujocoPolicy" + } +} diff --git a/configurations/humanoid_nses.json b/configurations/humanoid_nses.json new file mode 100644 index 00000000..566c8fc6 --- /dev/null +++ b/configurations/humanoid_nses.json @@ -0,0 +1,41 @@ +{ + "config": { + "calc_obstat_prob": 0.01, + "episodes_per_batch": 1000, + "eval_prob": 0.03, + "l2coeff": 0.005, + "noise_stdev": 0.02, + "snapshot_freq": 10, + "timesteps_per_batch": 100000, + "return_proc_mode": "centered_sign_rank", + "episode_cutoff_mode": "env_default" + }, + "env_id": "Humanoid-v1", + "algo_type": "ns", + "exp_prefix": "humanoid", + "novelty_search": { + "k": 10, + "population_size": 5, + "num_rollouts": 5, + "selection_method": "novelty_prob" + }, + "optimizer": { + "args": { + "stepsize": 0.01 + }, + "type": "adam" + }, + "policy": { + "args": { + "ac_bins": "continuous:", + "ac_noise_std": 0.01, + "connection_type": "ff", + "hidden_dims": [ + 256, + 256 + ], + "nonlin_type": "tanh" + }, + "type": "MujocoPolicy" + } +} diff --git a/configurations/humanoid_nsres.json b/configurations/humanoid_nsres.json new file mode 100644 index 00000000..711a7f57 --- /dev/null +++ b/configurations/humanoid_nsres.json @@ -0,0 +1,41 @@ +{ + "config": { + "calc_obstat_prob": 0.01, + "episodes_per_batch": 1000, + "eval_prob": 0.03, + "l2coeff": 0.005, + "noise_stdev": 0.02, + "snapshot_freq": 10, + "timesteps_per_batch": 100000, + "return_proc_mode": "centered_sign_rank", + "episode_cutoff_mode": "env_default" + }, + "env_id": "Humanoid-v1", + "algo_type": "nsr", + "exp_prefix": "humanoid", + "novelty_search": { + "k": 10, + "population_size": 5, + "num_rollouts": 5, + "selection_method": "novelty_prob" + }, + "optimizer": { + "args": { + "stepsize": 0.01 + }, + "type": "adam" + }, + "policy": { + "args": { + "ac_bins": "continuous:", + "ac_noise_std": 0.01, + "connection_type": "ff", + "hidden_dims": [ + 256, + 256 + ], + "nonlin_type": "tanh" + }, + "type": "MujocoPolicy" + } +} diff --git a/es_distributed/__init__.py b/es_distributed/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/es_distributed/atari_wrappers.py b/es_distributed/atari_wrappers.py new file mode 100644 index 00000000..12c1fc8d --- /dev/null +++ b/es_distributed/atari_wrappers.py @@ -0,0 +1,222 @@ +import numpy as np +from collections import deque +from PIL import Image +import gym +from gym import spaces + + +class NoopResetEnv(gym.Wrapper): + def __init__(self, env, noop_max=30): + """Sample initial states by taking random number of no-ops on reset. + No-op is assumed to be action 0. + """ + gym.Wrapper.__init__(self, env) + self.noop_max = noop_max + self.override_num_noops = None + assert env.unwrapped.get_action_meanings()[0] == 'NOOP' + + def _reset(self): + """ Do no-op action for a number of steps in [1, noop_max].""" + self.env.reset() + if self.override_num_noops is not None: + noops = self.override_num_noops + else: + noops = self.unwrapped.np_random.randint(1, self.noop_max + 1) #pylint: disable=E1101 + assert noops > 0 + obs = None + for _ in range(noops): + obs, _, done, _ = self.env.step(0) + if done: + obs = self.env.reset() + return obs + +class FireResetEnv(gym.Wrapper): + def __init__(self, env): + """Take action on reset for environments that are fixed until firing.""" + gym.Wrapper.__init__(self, env) + assert env.unwrapped.get_action_meanings()[1] == 'FIRE' + assert len(env.unwrapped.get_action_meanings()) >= 3 + + def _reset(self): + self.env.reset() + obs, _, done, _ = self.env.step(1) + if done: + self.env.reset() + obs, _, done, _ = self.env.step(2) + if done: + self.env.reset() + return obs + +class EpisodicLifeEnv(gym.Wrapper): + def __init__(self, env): + """Make end-of-life == end-of-episode, but only reset on true game over. + Done by DeepMind for the DQN and co. since it helps value estimation. + """ + gym.Wrapper.__init__(self, env) + self.lives = 0 + self.was_real_done = True + + def _step(self, action): + obs, reward, done, info = self.env.step(action) + self.was_real_done = done + # check current lives, make loss of life terminal, + # then update lives to handle bonus lives + lives = self.env.unwrapped.ale.lives() + if lives < self.lives and lives > 0: + # for Qbert somtimes we stay in lives == 0 condtion for a few frames + # so its important to keep lives > 0, so that we only reset once + # the environment advertises done. + done = True + self.lives = lives + return obs, reward, done, info + + def _reset(self): + """Reset only when lives are exhausted. + This way all states are still reachable even though lives are episodic, + and the learner need not know about any of this behind-the-scenes. + """ + if self.was_real_done: + obs = self.env.reset() + else: + # no-op step to advance from terminal/lost life state + obs, _, _, _ = self.env.step(0) + self.lives = self.env.unwrapped.ale.lives() + return obs + +class MaxAndSkipEnv(gym.Wrapper): + def __init__(self, env, skip=4): + """Return only every `skip`-th frame""" + gym.Wrapper.__init__(self, env) + # most recent raw observations (for max pooling across time steps) + self._obs_buffer = deque(maxlen=2) + self._skip = skip + self.viewer = None + + def _step(self, action): + """Repeat action, sum reward, and max over last observations.""" + total_reward = 0.0 + done = None + for _ in range(self._skip): + obs, reward, done, info = self.env.step(action) + self._obs_buffer.append(obs) + total_reward += reward + if done: + break + max_frame = np.max(np.stack(self._obs_buffer), axis=0) + + return max_frame, total_reward, done, info + + def _reset(self): + """Clear past frame buffer and init. to first obs. from inner env.""" + self._obs_buffer.clear() + obs = self.env.reset() + self._obs_buffer.append(obs) + return obs + + def _render(self, mode='human', close=False): + if close: + return + if mode == 'human': + from gym.envs.classic_control import rendering + if self.viewer is None: + self.viewer = rendering.SimpleImageViewer() + self.viewer.imshow(np.max(np.stack(self._obs_buffer), axis=0)) + return np.max(np.stack(self._obs_buffer), axis=0) + else: + return np.max(np.stack(self._obs_buffer), axis=0) + + +class WarpFrame(gym.ObservationWrapper): + def __init__(self, env, show_warped=False): + """Warp frames to 84x84 as done in the Nature paper and later work.""" + gym.ObservationWrapper.__init__(self, env) + self.res = 84 + self.observation_space = spaces.Box(low=0, high=255, shape=(self.res, self.res, 1)) + self.viewer = None + self.show_warped = show_warped + + def _observation(self, obs): + frame = np.dot(obs.astype('float32'), np.array([0.299, 0.587, 0.114], 'float32')) + frame = np.array(Image.fromarray(frame).resize((self.res, self.res), + resample=Image.BILINEAR), dtype=np.uint8) + return frame.reshape((self.res, self.res, 1)) + + def _render(self, mode='human', close=False): + if close: + return + if mode == 'human' and self.show_warped: + from gym.envs.classic_control import rendering + if self.viewer is None: + self.viewer = rendering.SimpleImageViewer() + img = self._observation(self.env._render('rgb_array', close)) * np.ones([1, 1, 3], dtype=np.uint8) + self.viewer.imshow(img) + return img + else: + return self.env._render(mode, close) + +class FrameStack(gym.Wrapper): + def __init__(self, env, k): + """Buffer observations and stack across channels (last axis).""" + gym.Wrapper.__init__(self, env) + self.k = k + self.frames = deque([], maxlen=k) + shp = env.observation_space.shape + assert shp[2] == 1 # can only stack 1-channel frames + self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0], shp[1], k)) + + def _reset(self): + """Clear buffer and re-fill by duplicating the first observation.""" + ob = self.env.reset() + for _ in range(self.k): self.frames.append(ob) + return self._observation() + + def _step(self, action): + ob, reward, done, info = self.env.step(action) + self.frames.append(ob) + return self._observation(), reward, done, info + + def _observation(self): + assert len(self.frames) == self.k + return np.concatenate(self.frames, axis=2) + +class ScaledFloatFrame(gym.ObservationWrapper): + def _observation(self, obs): + # careful! This undoes the memory optimization, use + # with smaller replay buffers only. + return np.array(obs).astype(np.float32) / 255.0 + +class DiscretizeActions(gym.Wrapper): + def __init__(self, env): + """Buffer observations and stack across channels (last axis).""" + gym.Wrapper.__init__(self, env) + self.temp_action = env.action_space + self.action_space = spaces.Discrete(5 ** int(np.prod(env.action_space.shape))) + + def _step(self, action): + cont_action = self.temp_action.low.copy() + for i in range(cont_action.size): + cont_action[i] += (self.temp_action.high[i] - self.temp_action.low[i]) * float(int(action) % 5) / 4.0 + action = int(action / 5) + return self.env.step(cont_action) + + +# def wrap_deepmind(env, episode_life=True, clip_rewards=True): +def wrap_deepmind(env, episode_life=False, skip=4, stack_frames=4, noop_max=30, noops=None, show_warped=False): + """Configure environment for DeepMind-style Atari. + + Note: this does not include frame stacking!""" + if episode_life: + env = EpisodicLifeEnv(env) + env = NoopResetEnv(env, noop_max=noop_max) + if noops: + env.override_num_noops = noops + if skip > 1: + assert 'NoFrameskip' in env.spec.id # required for DeepMind-style skip + env = MaxAndSkipEnv(env, skip=4) + if 'FIRE' in env.unwrapped.get_action_meanings(): + env = FireResetEnv(env) + env = WarpFrame(env, show_warped=show_warped) + if stack_frames > 1: + env = FrameStack(env, stack_frames) + env = ScaledFloatFrame(env) + return env \ No newline at end of file diff --git a/es_distributed/dist.py b/es_distributed/dist.py new file mode 100644 index 00000000..4a9c5ac0 --- /dev/null +++ b/es_distributed/dist.py @@ -0,0 +1,182 @@ +import logging +import os +import pickle +import time +from collections import deque +from pprint import pformat + +import redis + +logger = logging.getLogger(__name__) + +EXP_KEY = 'es:exp' +TASK_ID_KEY = 'es:task_id' +TASK_DATA_KEY = 'es:task_data' +TASK_CHANNEL = 'es:task_channel' +RESULTS_KEY = 'es:results' +ARCHIVE_KEY = 'es:archive' + +def serialize(x): + return pickle.dumps(x, protocol=-1) + + +def deserialize(x): + return pickle.loads(x) + + +def retry_connect(redis_cfg, tries=300, base_delay=4.): + for i in range(tries): + try: + r = redis.StrictRedis(**redis_cfg) + r.ping() + return r + except redis.ConnectionError as e: + if i == tries - 1: + raise + else: + delay = base_delay * (1 + (os.getpid() % 10) / 9) + logger.warning('Could not connect to {}. Retrying after {:.2f} sec ({}/{}). Error: {}'.format( + redis_cfg, delay, i + 2, tries, e)) + time.sleep(delay) + + +def retry_get(pipe, key, tries=300, base_delay=4.): + for i in range(tries): + # Try to (m)get + if isinstance(key, (list, tuple)): + vals = pipe.mget(key) + if all(v is not None for v in vals): + return vals + else: + val = pipe.get(key) + if val is not None: + return val + # Sleep and retry if any key wasn't available + if i != tries - 1: + delay = base_delay * (1 + (os.getpid() % 10) / 9) + logger.warning('{} not set. Retrying after {:.2f} sec ({}/{})'.format(key, delay, i + 2, tries)) + time.sleep(delay) + raise RuntimeError('{} not set'.format(key)) + + +class MasterClient: + def __init__(self, master_redis_cfg): + self.task_counter = 0 + self.master_redis = retry_connect(master_redis_cfg) + logger.info('[master] Connected to Redis: {}'.format(self.master_redis)) + + def declare_experiment(self, exp): + self.master_redis.set(EXP_KEY, serialize(exp)) + logger.info('[master] Declared experiment {}'.format(pformat(exp))) + + def declare_task(self, task_data): + task_id = self.task_counter + self.task_counter += 1 + + serialized_task_data = serialize(task_data) + (self.master_redis.pipeline() + .mset({TASK_ID_KEY: task_id, TASK_DATA_KEY: serialized_task_data}) + .publish(TASK_CHANNEL, serialize((task_id, serialized_task_data))) + .execute()) # TODO: can we avoid transferring task data twice and serializing so much? + logger.debug('[master] Declared task {}'.format(task_id)) + return task_id + + def pop_result(self): + task_id, result = deserialize(self.master_redis.blpop(RESULTS_KEY)[1]) + logger.debug('[master] Popped a result for task {}'.format(task_id)) + return task_id, result + + def add_to_novelty_archive(self, novelty_vector): + self.master_redis.rpush(ARCHIVE_KEY, serialize(novelty_vector)) + logger.info('[master] Added novelty vector to archive') + + def get_archive(self): + archive = self.master_redis.lrange(ARCHIVE_KEY, 0, -1) + return [deserialize(novelty_vector) for novelty_vector in archive] + + +class RelayClient: + """ + Receives and stores task broadcasts from the master + Batches and pushes results from workers to the master + """ + + def __init__(self, master_redis_cfg, relay_redis_cfg): + self.master_redis = retry_connect(master_redis_cfg) + logger.info('[relay] Connected to master: {}'.format(self.master_redis)) + self.local_redis = retry_connect(relay_redis_cfg) + logger.info('[relay] Connected to relay: {}'.format(self.local_redis)) + self.results_published = 0 + + def run(self): + # Initialization: read exp and latest task from master + self.local_redis.set(EXP_KEY, retry_get(self.master_redis, EXP_KEY)) + self._declare_task_local(*retry_get(self.master_redis, (TASK_ID_KEY, TASK_DATA_KEY))) + + # Start subscribing to tasks + p = self.master_redis.pubsub(ignore_subscribe_messages=True) + p.subscribe(**{TASK_CHANNEL: lambda msg: self._declare_task_local(*deserialize(msg['data']))}) + p.run_in_thread(sleep_time=0.001) + + # Loop on RESULTS_KEY and push to master + batch_sizes, last_print_time = deque(maxlen=20), time.time() # for logging + while True: + results = [] + start_time = curr_time = time.time() + while curr_time - start_time < 0.001: + results.append(self.local_redis.blpop(RESULTS_KEY)[1]) + curr_time = time.time() + self.results_published += len(results) + self.master_redis.rpush(RESULTS_KEY, *results) + # Log + batch_sizes.append(len(results)) + if curr_time - last_print_time > 5.0: + logger.info('[relay] Average batch size {:.3f} ({} total)'.format(sum(batch_sizes) / len(batch_sizes), self.results_published)) + last_print_time = curr_time + + def _declare_task_local(self, task_id, task_data): + logger.info('[relay] Received task {}'.format(task_id)) + self.results_published = 0 + self.local_redis.mset({TASK_ID_KEY: task_id, TASK_DATA_KEY: task_data}) + + +class WorkerClient: + def __init__(self, relay_redis_cfg, master_redis_cfg): + self.local_redis = retry_connect(relay_redis_cfg) + logger.info('[worker] Connected to relay: {}'.format(self.local_redis)) + self.master_redis = retry_connect(master_redis_cfg) + logger.warning('[worker] Connected to master: {}'.format(self.master_redis)) + + self.cached_task_id, self.cached_task_data = None, None + + def get_experiment(self): + # Grab experiment info + exp = deserialize(retry_get(self.local_redis, EXP_KEY)) + logger.info('[worker] Experiment: {}'.format(exp)) + return exp + + def get_archive(self): + archive = self.master_redis.lrange(ARCHIVE_KEY, 0, -1) + return [deserialize(novelty_vector) for novelty_vector in archive] + + def get_current_task(self): + with self.local_redis.pipeline() as pipe: + while True: + try: + pipe.watch(TASK_ID_KEY) + task_id = int(retry_get(pipe, TASK_ID_KEY)) + if task_id == self.cached_task_id: + logger.debug('[worker] Returning cached task {}'.format(task_id)) + break + pipe.multi() + pipe.get(TASK_DATA_KEY) + logger.info('[worker] Getting new task {}. Cached task was {}'.format(task_id, self.cached_task_id)) + self.cached_task_id, self.cached_task_data = task_id, deserialize(pipe.execute()[0]) + break + except redis.WatchError: + continue + return self.cached_task_id, self.cached_task_data + + def push_result(self, task_id, result): + self.local_redis.rpush(RESULTS_KEY, serialize((task_id, result))) + logger.debug('[worker] Pushed result for task {}'.format(task_id)) diff --git a/es_distributed/es.py b/es_distributed/es.py new file mode 100644 index 00000000..e001449e --- /dev/null +++ b/es_distributed/es.py @@ -0,0 +1,420 @@ +import logging +import time +from collections import namedtuple + + +import numpy as np + +from .dist import MasterClient, WorkerClient + +logger = logging.getLogger(__name__) + +Config = namedtuple('Config', [ + 'l2coeff', 'noise_stdev', 'episodes_per_batch', 'timesteps_per_batch', + 'calc_obstat_prob', 'eval_prob', 'snapshot_freq', + 'return_proc_mode', 'episode_cutoff_mode' +]) +Task = namedtuple('Task', ['params', 'ob_mean', 'ob_std', 'ref_batch', 'timestep_limit']) +Result = namedtuple('Result', [ + 'worker_id', + 'noise_inds_n', 'returns_n2', 'signreturns_n2', 'lengths_n2', + 'eval_return', 'eval_length', + 'ob_sum', 'ob_sumsq', 'ob_count' +]) + + +class RunningStat(object): + def __init__(self, shape, eps): + self.sum = np.zeros(shape, dtype=np.float32) + self.sumsq = np.full(shape, eps, dtype=np.float32) + self.count = eps + + def increment(self, s, ssq, c): + self.sum += s + self.sumsq += ssq + self.count += c + + @property + def mean(self): + return self.sum / self.count + + @property + def std(self): + return np.sqrt(np.maximum(self.sumsq / self.count - np.square(self.mean), 1e-2)) + + def set_from_init(self, init_mean, init_std, init_count): + self.sum[:] = init_mean * init_count + self.sumsq[:] = (np.square(init_mean) + np.square(init_std)) * init_count + self.count = init_count + + +class SharedNoiseTable(object): + def __init__(self): + import ctypes, multiprocessing + seed = 123 + count = 250000000 # 1 gigabyte of 32-bit numbers. Will actually sample 2 gigabytes below. + logger.info('Sampling {} random numbers with seed {}'.format(count, seed)) + self._shared_mem = multiprocessing.Array(ctypes.c_float, count) + self.noise = np.ctypeslib.as_array(self._shared_mem.get_obj()) + assert self.noise.dtype == np.float32 + self.noise[:] = np.random.RandomState(seed).randn(count) # 64-bit to 32-bit conversion here + logger.info('Sampled {} bytes'.format(self.noise.size * 4)) + + def get(self, i, dim): + return self.noise[i:i + dim] + + def sample_index(self, stream, dim): + return stream.randint(0, len(self.noise) - dim + 1) + + +def compute_ranks(x): + """ + Returns ranks in [0, len(x)) + Note: This is different from scipy.stats.rankdata, which returns ranks in [1, len(x)]. + """ + assert x.ndim == 1 + ranks = np.empty(len(x), dtype=int) + ranks[x.argsort()] = np.arange(len(x)) + return ranks + + +def compute_centered_ranks(x): + y = compute_ranks(x.ravel()).reshape(x.shape).astype(np.float32) + y /= (x.size - 1) + y -= .5 + return y + +def make_session(single_threaded): + import tensorflow as tf + if not single_threaded: + return tf.InteractiveSession() + return tf.InteractiveSession(config=tf.ConfigProto(inter_op_parallelism_threads=1, intra_op_parallelism_threads=1)) + + +def itergroups(items, group_size): + assert group_size >= 1 + group = [] + for x in items: + group.append(x) + if len(group) == group_size: + yield tuple(group) + del group[:] + if group: + yield tuple(group) + +def get_ref_batch(env, batch_size=32): + ref_batch = [] + ob = env.reset() + while len(ref_batch) < batch_size: + ob, rew, done, info = env.step(env.action_space.sample()) + ref_batch.append(ob) + if done: + ob = env.reset() + return ref_batch + +def batched_weighted_sum(weights, vecs, batch_size): + total = 0. + num_items_summed = 0 + for batch_weights, batch_vecs in zip(itergroups(weights, batch_size), itergroups(vecs, batch_size)): + assert len(batch_weights) == len(batch_vecs) <= batch_size + total += np.dot(np.asarray(batch_weights, dtype=np.float32), np.asarray(batch_vecs, dtype=np.float32)) + num_items_summed += len(batch_weights) + return total, num_items_summed + + +def setup(exp, single_threaded): + import gym + gym.undo_logger_setup() + from . import policies, tf_util + + config = Config(**exp['config']) + env = gym.make(exp['env_id']) + if exp['policy']['type'] == "ESAtariPolicy": + from .atari_wrappers import wrap_deepmind + env = wrap_deepmind(env) + sess = make_session(single_threaded=single_threaded) + policy = getattr(policies, exp['policy']['type'])(env.observation_space, env.action_space, **exp['policy']['args']) + tf_util.initialize() + return config, env, sess, policy + + +def run_master(master_redis_cfg, log_dir, exp): + logger.info('run_master: {}'.format(locals())) + from .optimizers import SGD, Adam + from . import tabular_logger as tlogger + logger.info('Tabular logging to {}'.format(log_dir)) + tlogger.start(log_dir) + config, env, sess, policy = setup(exp, single_threaded=False) + master = MasterClient(master_redis_cfg) + theta = policy.get_trainable_flat() + optimizer = {'sgd': SGD, 'adam': Adam}[exp['optimizer']['type']](theta, **exp['optimizer']['args']) + noise = SharedNoiseTable() + rs = np.random.RandomState() + + if policy.needs_ob_stat: + ob_stat = RunningStat( + env.observation_space.shape, + eps=1e-2 # eps to prevent dividing by zero at the beginning when computing mean/stdev + ) + + if policy.needs_ref_batch: + ref_batch = get_ref_batch(env, batch_size=128) + policy.set_ref_batch(ref_batch) + + + if 'init_from' in exp['policy']: + logger.info('Initializing weights from {}'.format(exp['policy']['init_from'])) + policy.initialize_from(exp['policy']['init_from'], ob_stat) + + if isinstance(config.episode_cutoff_mode, int): + tslimit, incr_tslimit_threshold, tslimit_incr_ratio, tslimit_max = config.episode_cutoff_mode, None, None, config.episode_cutoff_mode + adaptive_tslimit = False + + elif config.episode_cutoff_mode.startswith('adaptive:'): + _, args = config.episode_cutoff_mode.split(':') + arg0, arg1, arg2, arg3 = args.split(',') + tslimit, incr_tslimit_threshold, tslimit_incr_ratio, tslimit_max = int(arg0), float(arg1), float(arg2), float(arg3) + adaptive_tslimit = True + logger.info( + 'Starting timestep limit set to {}. When {}% of rollouts hit the limit, it will be increased by {}. The maximum timestep limit is {}'.format( + tslimit, incr_tslimit_threshold * 100, tslimit_incr_ratio, tslimit_max)) + + elif config.episode_cutoff_mode == 'env_default': + tslimit, incr_tslimit_threshold, tslimit_incr_ratio, tslimit_max = None, None, None, None + adaptive_tslimit = False + else: + raise NotImplementedError(config.episode_cutoff_mode) + + episodes_so_far = 0 + timesteps_so_far = 0 + tstart = time.time() + master.declare_experiment(exp) + + while True: + step_tstart = time.time() + theta = policy.get_trainable_flat() + assert theta.dtype == np.float32 + + curr_task_id = master.declare_task(Task( + params=theta, + ob_mean=ob_stat.mean if policy.needs_ob_stat else None, + ob_std=ob_stat.std if policy.needs_ob_stat else None, + ref_batch=ref_batch if policy.needs_ref_batch else None, + timestep_limit=tslimit + )) + tlogger.log('********** Iteration {} **********'.format(curr_task_id)) + + # Pop off results for the current task + curr_task_results, eval_rets, eval_lens, worker_ids = [], [], [], [] + num_results_skipped, num_episodes_popped, num_timesteps_popped, ob_count_this_batch = 0, 0, 0, 0 + while num_episodes_popped < config.episodes_per_batch or num_timesteps_popped < config.timesteps_per_batch: + # Wait for a result + task_id, result = master.pop_result() + assert isinstance(task_id, int) and isinstance(result, Result) + assert (result.eval_return is None) == (result.eval_length is None) + worker_ids.append(result.worker_id) + + if result.eval_length is not None: + # This was an eval job + episodes_so_far += 1 + timesteps_so_far += result.eval_length + # Store the result only for current tasks + if task_id == curr_task_id: + eval_rets.append(result.eval_return) + eval_lens.append(result.eval_length) + else: + assert (result.noise_inds_n.ndim == 1 and + result.returns_n2.shape == result.lengths_n2.shape == (len(result.noise_inds_n), 2)) + assert result.returns_n2.dtype == np.float32 + # Store results only for current tasks + if task_id == curr_task_id: + # Update counts + result_num_eps = result.lengths_n2.size + result_num_timesteps = result.lengths_n2.sum() + episodes_so_far += result_num_eps + timesteps_so_far += result_num_timesteps + + curr_task_results.append(result) + num_episodes_popped += result_num_eps + num_timesteps_popped += result_num_timesteps + # Update ob stats + if policy.needs_ob_stat and result.ob_count > 0: + ob_stat.increment(result.ob_sum, result.ob_sumsq, result.ob_count) + ob_count_this_batch += result.ob_count + else: + num_results_skipped += 1 + + # Compute skip fraction + frac_results_skipped = num_results_skipped / (num_results_skipped + len(curr_task_results)) + if num_results_skipped > 0: + logger.warning('Skipped {} out of date results ({:.2f}%)'.format( + num_results_skipped, 100. * frac_results_skipped)) + + # Assemble results + noise_inds_n = np.concatenate([r.noise_inds_n for r in curr_task_results]) + returns_n2 = np.concatenate([r.returns_n2 for r in curr_task_results]) + lengths_n2 = np.concatenate([r.lengths_n2 for r in curr_task_results]) + signreturns_n2 = np.concatenate([r.signreturns_n2 for r in curr_task_results]) + + assert noise_inds_n.shape[0] == returns_n2.shape[0] == lengths_n2.shape[0] + # Process returns + if config.return_proc_mode == 'centered_rank': + proc_returns_n2 = compute_centered_ranks(returns_n2) + elif config.return_proc_mode == 'sign': + proc_returns_n2 = signreturns_n2 + elif config.return_proc_mode == 'centered_sign_rank': + proc_returns_n2 = compute_centered_ranks(signreturns_n2) + else: + raise NotImplementedError(config.return_proc_mode) + + # Compute and take step + g, count = batched_weighted_sum( + proc_returns_n2[:, 0] - proc_returns_n2[:, 1], + (noise.get(idx, policy.num_params) for idx in noise_inds_n), + batch_size=500 + ) + g /= returns_n2.size + assert g.shape == (policy.num_params,) and g.dtype == np.float32 and count == len(noise_inds_n) + update_ratio, theta = optimizer.update(-g + config.l2coeff * theta) + + #updating policy + policy.set_trainable_flat(theta) + + # Update ob stat (we're never running the policy in the master, but we might be snapshotting the policy) + if policy.needs_ob_stat: + policy.set_ob_stat(ob_stat.mean, ob_stat.std) + + # Update number of steps to take + if adaptive_tslimit and (lengths_n2 == tslimit).mean() >= incr_tslimit_threshold: + old_tslimit = tslimit + tslimit = min(int(tslimit_incr_ratio * tslimit), tslimit_max) + logger.info('Increased timestep limit from {} to {}'.format(old_tslimit, tslimit)) + + step_tend = time.time() + tlogger.record_tabular("EpRewMean", returns_n2.mean()) + tlogger.record_tabular("EpRewStd", returns_n2.std()) + tlogger.record_tabular("EpLenMean", lengths_n2.mean()) + + tlogger.record_tabular("EvalEpRewMean", np.nan if not eval_rets else np.mean(eval_rets)) + tlogger.record_tabular("EvalEpRewMedian", np.nan if not eval_rets else np.median(eval_rets)) + tlogger.record_tabular("EvalEpRewStd", np.nan if not eval_rets else np.std(eval_rets)) + tlogger.record_tabular("EvalEpLenMean", np.nan if not eval_rets else np.mean(eval_lens)) + tlogger.record_tabular("EvalPopRank", np.nan if not eval_rets else ( + np.searchsorted(np.sort(returns_n2.ravel()), eval_rets).mean() / returns_n2.size)) + tlogger.record_tabular("EvalEpCount", len(eval_rets)) + + tlogger.record_tabular("Norm", float(np.square(policy.get_trainable_flat()).sum())) + tlogger.record_tabular("GradNorm", float(np.square(g).sum())) + tlogger.record_tabular("UpdateRatio", float(update_ratio)) + + tlogger.record_tabular("EpisodesThisIter", lengths_n2.size) + tlogger.record_tabular("EpisodesSoFar", episodes_so_far) + tlogger.record_tabular("TimestepsThisIter", lengths_n2.sum()) + tlogger.record_tabular("TimestepsSoFar", timesteps_so_far) + + num_unique_workers = len(set(worker_ids)) + tlogger.record_tabular("UniqueWorkers", num_unique_workers) + tlogger.record_tabular("UniqueWorkersFrac", num_unique_workers / len(worker_ids)) + tlogger.record_tabular("ResultsSkippedFrac", frac_results_skipped) + tlogger.record_tabular("ObCount", ob_count_this_batch) + + tlogger.record_tabular("TimeElapsedThisIter", step_tend - step_tstart) + tlogger.record_tabular("TimeElapsed", step_tend - tstart) + tlogger.dump_tabular() + + # if config.snapshot_freq != 0 and curr_task_id % config.snapshot_freq == 0: + if config.snapshot_freq != 0: + import os.path as osp + filename = 'snapshot_iter{:05d}_rew{}.h5'.format( + curr_task_id, + np.nan if not eval_rets else int(np.mean(eval_rets)) + ) + assert not osp.exists(filename) + policy.save(filename) + tlogger.log('Saved snapshot {}'.format(filename)) + + +def rollout_and_update_ob_stat(policy, env, timestep_limit, rs, task_ob_stat, calc_obstat_prob): + if policy.needs_ob_stat and calc_obstat_prob != 0 and rs.rand() < calc_obstat_prob: + rollout_rews, rollout_len, obs, rollout_nov = policy.rollout( + env, timestep_limit=timestep_limit, save_obs=True, random_stream=rs) + task_ob_stat.increment(obs.sum(axis=0), np.square(obs).sum(axis=0), len(obs)) + else: + rollout_rews, rollout_len, rollout_nov = policy.rollout(env, timestep_limit=timestep_limit, random_stream=rs) + return rollout_rews, rollout_len, rollout_nov + + +def run_worker(master_redis_cfg, relay_redis_cfg, noise, *, min_task_runtime=.2): + logger.info('run_worker: {}'.format(locals())) + assert isinstance(noise, SharedNoiseTable) + worker = WorkerClient(relay_redis_cfg, master_redis_cfg) + exp = worker.get_experiment() + config, env, sess, policy = setup(exp, single_threaded=True) + rs = np.random.RandomState() + worker_id = rs.randint(2 ** 31) + + assert policy.needs_ob_stat == (config.calc_obstat_prob != 0) + + while True: + task_id, task_data = worker.get_current_task() + task_tstart = time.time() + assert isinstance(task_id, int) and isinstance(task_data, Task) + + if policy.needs_ob_stat: + policy.set_ob_stat(task_data.ob_mean, task_data.ob_std) + + if policy.needs_ref_batch: + policy.set_ref_batch(task_data.ref_batch) + + if rs.rand() < config.eval_prob: + # Evaluation: noiseless weights and noiseless actions + policy.set_trainable_flat(task_data.params) + eval_rews, eval_length, _ = policy.rollout(env, timestep_limit=task_data.timestep_limit) + eval_return = eval_rews.sum() + logger.info('Eval result: task={} return={:.3f} length={}'.format(task_id, eval_return, eval_length)) + worker.push_result(task_id, Result( + worker_id=worker_id, + noise_inds_n=None, + returns_n2=None, + signreturns_n2=None, + lengths_n2=None, + eval_return=eval_return, + eval_length=eval_length, + ob_sum=None, + ob_sumsq=None, + ob_count=None + )) + else: + # Rollouts with noise + noise_inds, returns, signreturns, lengths = [], [], [], [] + task_ob_stat = RunningStat(env.observation_space.shape, eps=0.) # eps=0 because we're incrementing only + + while not noise_inds or time.time() - task_tstart < min_task_runtime: + noise_idx = noise.sample_index(rs, policy.num_params) + v = config.noise_stdev * noise.get(noise_idx, policy.num_params) + + policy.set_trainable_flat(task_data.params + v) + rews_pos, len_pos, nov_vec_pos = rollout_and_update_ob_stat( + policy, env, task_data.timestep_limit, rs, task_ob_stat, config.calc_obstat_prob) + + policy.set_trainable_flat(task_data.params - v) + rews_neg, len_neg, nov_vec_neg = rollout_and_update_ob_stat( + policy, env, task_data.timestep_limit, rs, task_ob_stat, config.calc_obstat_prob) + + signreturns.append([np.sign(rews_pos).sum(), np.sign(rews_neg).sum()]) + noise_inds.append(noise_idx) + returns.append([rews_pos.sum(), rews_neg.sum()]) + lengths.append([len_pos, len_neg]) + + worker.push_result(task_id, Result( + worker_id=worker_id, + noise_inds_n=np.array(noise_inds), + returns_n2=np.array(returns, dtype=np.float32), + signreturns_n2=np.array(signreturns, dtype=np.float32), + lengths_n2=np.array(lengths, dtype=np.int32), + eval_return=None, + eval_length=None, + ob_sum=None if task_ob_stat.count == 0 else task_ob_stat.sum, + ob_sumsq=None if task_ob_stat.count == 0 else task_ob_stat.sumsq, + ob_count=task_ob_stat.count + )) diff --git a/es_distributed/ga.py b/es_distributed/ga.py new file mode 100644 index 00000000..d781a32d --- /dev/null +++ b/es_distributed/ga.py @@ -0,0 +1,282 @@ +import logging +import time +from collections import namedtuple + +import numpy as np + +from .dist import MasterClient, WorkerClient +from .es import * + + +def setup(exp, single_threaded): + import gym + gym.undo_logger_setup() + from . import policies, tf_util + + config = Config(**exp['config']) + env = gym.make(exp['env_id']) + if exp['env_id'].endswith('NoFrameskip-v4'): + from .atari_wrappers import wrap_deepmind + env = wrap_deepmind(env) + sess = make_session(single_threaded=single_threaded) + policy = getattr(policies, exp['policy']['type'])(env.observation_space, env.action_space, **exp['policy']['args']) + tf_util.initialize() + return config, env, sess, policy + + +def rollout_and_update_ob_stat(policy, env, timestep_limit, rs, task_ob_stat, calc_obstat_prob): + if policy.needs_ob_stat and calc_obstat_prob != 0 and rs.rand() < calc_obstat_prob: + rollout_rews, rollout_len, obs = policy.rollout( + env, timestep_limit=timestep_limit, save_obs=True, random_stream=rs) + task_ob_stat.increment(obs.sum(axis=0), np.square(obs).sum(axis=0), len(obs)) + else: + rollout_rews, rollout_len = policy.rollout(env, timestep_limit=timestep_limit, random_stream=rs) + return rollout_rews, rollout_len + +GATask = namedtuple('GATask', ['params', 'population', 'ob_mean', 'ob_std', 'timestep_limit']) +def run_master(master_redis_cfg, log_dir, exp): + logger.info('run_master: {}'.format(locals())) + from .optimizers import SGD, Adam + from . import tabular_logger as tlogger + logger.info('Tabular logging to {}'.format(log_dir)) + tlogger.start(log_dir) + config, env, sess, policy = setup(exp, single_threaded=False) + master = MasterClient(master_redis_cfg) + noise = SharedNoiseTable() + rs = np.random.RandomState() + + if isinstance(config.episode_cutoff_mode, int): + tslimit, incr_tslimit_threshold, tslimit_incr_ratio = config.episode_cutoff_mode, None, None + adaptive_tslimit = False + elif config.episode_cutoff_mode.startswith('adaptive:'): + _, args = config.episode_cutoff_mode.split(':') + arg0, arg1, arg2 = args.split(',') + tslimit, incr_tslimit_threshold, tslimit_incr_ratio = int(arg0), float(arg1), float(arg2) + adaptive_tslimit = True + logger.info( + 'Starting timestep limit set to {}. When {}% of rollouts hit the limit, it will be increased by {}'.format( + tslimit, incr_tslimit_threshold * 100, tslimit_incr_ratio)) + elif config.episode_cutoff_mode == 'env_default': + tslimit, incr_tslimit_threshold, tslimit_incr_ratio = None, None, None + adaptive_tslimit = False + else: + raise NotImplementedError(config.episode_cutoff_mode) + + episodes_so_far = 0 + timesteps_so_far = 0 + tstart = time.time() + master.declare_experiment(exp) + best_score = float('-inf') + population = [] + population_size = exp['population_size'] + num_elites = exp['num_elites'] + population_score = np.array([]) + + while True: + step_tstart = time.time() + theta = policy.get_trainable_flat() + assert theta.dtype == np.float32 + + curr_task_id = master.declare_task(GATask( + params=theta, + population=population, + ob_mean=ob_stat.mean if policy.needs_ob_stat else None, + ob_std=ob_stat.std if policy.needs_ob_stat else None, + timestep_limit=tslimit + )) + + tlogger.log('********** Iteration {} **********'.format(curr_task_id)) + + # Pop off results for the current task + curr_task_results, eval_rets, eval_lens, worker_ids = [], [], [], [] + num_results_skipped, num_episodes_popped, num_timesteps_popped, ob_count_this_batch = 0, 0, 0, 0 + while num_episodes_popped < config.episodes_per_batch or num_timesteps_popped < config.timesteps_per_batch: + # Wait for a result + task_id, result = master.pop_result() + assert isinstance(task_id, int) and isinstance(result, Result) + assert (result.eval_return is None) == (result.eval_length is None) + worker_ids.append(result.worker_id) + + if result.eval_length is not None: + # This was an eval job + episodes_so_far += 1 + timesteps_so_far += result.eval_length + # Store the result only for current tasks + if task_id == curr_task_id: + eval_rets.append(result.eval_return) + eval_lens.append(result.eval_length) + else: + assert result.returns_n2.dtype == np.float32 + # Store results only for current tasks + if task_id == curr_task_id: + # Update counts + result_num_eps = result.lengths_n2.size + result_num_timesteps = result.lengths_n2.sum() + episodes_so_far += result_num_eps + timesteps_so_far += result_num_timesteps + + curr_task_results.append(result) + num_episodes_popped += result_num_eps + num_timesteps_popped += result_num_timesteps + # Update ob stats + if policy.needs_ob_stat and result.ob_count > 0: + ob_stat.increment(result.ob_sum, result.ob_sumsq, result.ob_count) + ob_count_this_batch += result.ob_count + else: + num_results_skipped += 1 + + # Compute skip fraction + frac_results_skipped = num_results_skipped / (num_results_skipped + len(curr_task_results)) + if num_results_skipped > 0: + logger.warning('Skipped {} out of date results ({:.2f}%)'.format( + num_results_skipped, 100. * frac_results_skipped)) + + # Assemble results + elite + noise_inds_n = list(population[:num_elites]) + returns_n2 = list(population_score[:num_elites]) + for r in curr_task_results: + noise_inds_n.extend(r.noise_inds_n) + returns_n2.extend(r.returns_n2) + noise_inds_n = np.array(noise_inds_n) + returns_n2 = np.array(returns_n2) + lengths_n2 = np.array([r.lengths_n2 for r in curr_task_results]) + # Process returns + idx = np.argpartition(returns_n2, (-population_size, -1))[-1:-population_size-1:-1] + population = noise_inds_n[idx] + population_score = returns_n2[idx] + assert len(population) == population_size + assert np.max(returns_n2) == population_score[0] + + print('Elite: {} score: {}'.format(population[0], population_score[0])) + policy.set_trainable_flat(noise.get(population[0][0], policy.num_params)) + policy.reinitialize() + v = policy.get_trainable_flat() + + for seed in population[0][1:]: + v += config.noise_stdev * noise.get(seed, policy.num_params) + policy.set_trainable_flat(v) + + # Update number of steps to take + if adaptive_tslimit and (lengths_n2 == tslimit).mean() >= incr_tslimit_threshold: + old_tslimit = tslimit + tslimit = int(tslimit_incr_ratio * tslimit) + logger.info('Increased timestep limit from {} to {}'.format(old_tslimit, tslimit)) + + step_tend = time.time() + tlogger.record_tabular("EpRewMax", returns_n2.max()) + tlogger.record_tabular("EpRewMean", returns_n2.mean()) + tlogger.record_tabular("EpRewStd", returns_n2.std()) + tlogger.record_tabular("EpLenMean", lengths_n2.mean()) + + tlogger.record_tabular("EvalEpRewMean", np.nan if not eval_rets else np.mean(eval_rets)) + tlogger.record_tabular("EvalEpRewMedian", np.nan if not eval_rets else np.median(eval_rets)) + tlogger.record_tabular("EvalEpRewStd", np.nan if not eval_rets else np.std(eval_rets)) + tlogger.record_tabular("EvalEpLenMean", np.nan if not eval_rets else np.mean(eval_lens)) + tlogger.record_tabular("EvalPopRank", np.nan if not eval_rets else ( + np.searchsorted(np.sort(returns_n2.ravel()), eval_rets).mean() / returns_n2.size)) + tlogger.record_tabular("EvalEpCount", len(eval_rets)) + + tlogger.record_tabular("Norm", float(np.square(policy.get_trainable_flat()).sum())) + + tlogger.record_tabular("EpisodesThisIter", lengths_n2.size) + tlogger.record_tabular("EpisodesSoFar", episodes_so_far) + tlogger.record_tabular("TimestepsThisIter", lengths_n2.sum()) + tlogger.record_tabular("TimestepsSoFar", timesteps_so_far) + + num_unique_workers = len(set(worker_ids)) + tlogger.record_tabular("UniqueWorkers", num_unique_workers) + tlogger.record_tabular("UniqueWorkersFrac", num_unique_workers / len(worker_ids)) + tlogger.record_tabular("ResultsSkippedFrac", frac_results_skipped) + tlogger.record_tabular("ObCount", ob_count_this_batch) + + tlogger.record_tabular("TimeElapsedThisIter", step_tend - step_tstart) + tlogger.record_tabular("TimeElapsed", step_tend - tstart) + tlogger.dump_tabular() + + # if config.snapshot_freq != 0 and curr_task_id % config.snapshot_freq == 0: + if config.snapshot_freq != 0: + import os.path as osp + filename = 'snapshot_iter{:05d}_rew{}.h5'.format( + curr_task_id, + np.nan if not eval_rets else int(np.mean(eval_rets)) + ) + assert not osp.exists(filename) + policy.save(filename) + tlogger.log('Saved snapshot {}'.format(filename)) + +def run_worker(master_redis_cfg, relay_redis_cfg, noise, *, min_task_runtime=.2): + logger.info('run_worker: {}'.format(locals())) + assert isinstance(noise, SharedNoiseTable) + worker = WorkerClient(master_redis_cfg, relay_redis_cfg) + exp = worker.get_experiment() + config, env, sess, policy = setup(exp, single_threaded=True) + rs = np.random.RandomState() + worker_id = rs.randint(2 ** 31) + + assert policy.needs_ob_stat == (config.calc_obstat_prob != 0) + + while True: + task_id, task_data = worker.get_current_task() + task_tstart = time.time() + assert isinstance(task_id, int) and isinstance(task_data, GATask) + if policy.needs_ob_stat: + policy.set_ob_stat(task_data.ob_mean, task_data.ob_std) + + if rs.rand() < config.eval_prob: + # Evaluation: noiseless weights and noiseless actions + policy.set_trainable_flat(task_data.params) + eval_rews, eval_length = policy.rollout(env) # eval rollouts don't obey task_data.timestep_limit + eval_return = eval_rews.sum() + logger.info('Eval result: task={} return={:.3f} length={}'.format(task_id, eval_return, eval_length)) + worker.push_result(task_id, Result( + worker_id=worker_id, + noise_inds_n=None, + returns_n2=None, + signreturns_n2=None, + lengths_n2=None, + eval_return=eval_return, + eval_length=eval_length, + ob_sum=None, + ob_sumsq=None, + ob_count=None + )) + else: + # Rollouts with noise + noise_inds, returns, signreturns, lengths = [], [], [], [] + task_ob_stat = RunningStat(env.observation_space.shape, eps=0.) # eps=0 because we're incrementing only + + while not noise_inds or time.time() - task_tstart < min_task_runtime: + if len(task_data.population) > 0: + seeds = list(task_data.population[rs.randint(len(task_data.population))]) + [noise.sample_index(rs, policy.num_params)] + else: + seeds = [noise.sample_index(rs, policy.num_params)] + + v = noise.get(seeds[0], policy.num_params) + + policy.set_trainable_flat(v) + policy.reinitialize() + v = policy.get_trainable_flat() + + for seed in seeds[1:]: + v += config.noise_stdev * noise.get(seed, policy.num_params) + policy.set_trainable_flat(v) + + rews_pos, len_pos = rollout_and_update_ob_stat( + policy, env, task_data.timestep_limit, rs, task_ob_stat, config.calc_obstat_prob) + noise_inds.append(seeds) + returns.append(rews_pos.sum()) + signreturns.append(np.sign(rews_pos).sum()) + lengths.append(len_pos) + + worker.push_result(task_id, Result( + worker_id=worker_id, + noise_inds_n=noise_inds, + returns_n2=np.array(returns, dtype=np.float32), + signreturns_n2=np.array(signreturns, dtype=np.float32), + lengths_n2=np.array(lengths, dtype=np.int32), + eval_return=None, + eval_length=None, + ob_sum=None if task_ob_stat.count == 0 else task_ob_stat.sum, + ob_sumsq=None if task_ob_stat.count == 0 else task_ob_stat.sumsq, + ob_count=task_ob_stat.count + )) diff --git a/es_distributed/main.py b/es_distributed/main.py new file mode 100644 index 00000000..dce1c028 --- /dev/null +++ b/es_distributed/main.py @@ -0,0 +1,90 @@ +import errno +import json +import logging +import os +import sys + +import click + +from .dist import RelayClient +from .es import run_master, run_worker, SharedNoiseTable + + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError as exc: + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: + raise + +@click.group() +def cli(): + logging.basicConfig( + format='[%(asctime)s pid=%(process)d] %(message)s', + level=logging.INFO, + stream=sys.stderr) + +def import_algo(name): + if name == 'es': + from . import es as algo + elif name == 'ns-es' or name == "nsr-es": + from . import nses as algo + elif name == 'ga': + from . import ga as algo + elif name == 'rs': + from . import rs as algo + else: + raise NotImplementedError() + return algo + +@cli.command() +@click.option('--algo') +@click.option('--exp_str') +@click.option('--exp_file') +@click.option('--master_socket_path', required=True) +@click.option('--log_dir') +def master(algo, exp_str, exp_file, master_socket_path, log_dir): + # Start the master + assert (exp_str is None) != (exp_file is None), 'Must provide exp_str xor exp_file to the master' + if exp_str: + exp = json.loads(exp_str) + elif exp_file: + with open(exp_file, 'r') as f: + exp = json.loads(f.read()) + else: + assert False + log_dir = os.path.expanduser(log_dir) if log_dir else '/tmp/es_master_{}'.format(os.getpid()) + mkdir_p(log_dir) + algo = import_algo(algo) + algo.run_master({'unix_socket_path': master_socket_path}, log_dir, exp) + + +@cli.command() +@click.option('--algo') +@click.option('--master_host', required=True) +@click.option('--master_port', default=6379, type=int) +@click.option('--relay_socket_path', required=True) +@click.option('--num_workers', type=int, default=0) +def workers(algo, master_host, master_port, relay_socket_path, num_workers): + # Start the relay + master_redis_cfg = {'host': master_host, 'port': master_port} + relay_redis_cfg = {'unix_socket_path': relay_socket_path} + if os.fork() == 0: + RelayClient(master_redis_cfg, relay_redis_cfg).run() + return + # Start the workers + algo = import_algo(algo) + noise = algo.SharedNoiseTable() # Workers share the same noise + num_workers = num_workers if num_workers else os.cpu_count() + logging.info('Spawning {} workers'.format(num_workers)) + for _ in range(num_workers): + if os.fork() == 0: + algo.run_worker(master_redis_cfg, relay_redis_cfg, noise=noise) + return + os.wait() + + +if __name__ == '__main__': + cli() diff --git a/es_distributed/nses.py b/es_distributed/nses.py new file mode 100644 index 00000000..4d126b4d --- /dev/null +++ b/es_distributed/nses.py @@ -0,0 +1,382 @@ +import logging +import time +from collections import namedtuple +import tensorflow as tf +from copy import deepcopy + +import numpy as np + +from .dist import MasterClient, WorkerClient +from .es import * + +def euclidean_distance(x, y): + n, m = len(x), len(y) + if n > m: + a = np.linalg.norm(y - x[:m]) + b = np.linalg.norm(y[-1] - x[m:]) + else: + a = np.linalg.norm(x - y[:n]) + b = np.linalg.norm(x[-1] - y[n:]) + return np.sqrt(a**2 + b**2) + +def compute_novelty_vs_archive(archive, novelty_vector, k): + distances = [] + nov = novelty_vector.astype(np.float) + for point in archive: + distances.append(euclidean_distance(point.astype(np.float), nov)) + + # Pick k nearest neighbors + distances = np.array(distances) + top_k_indicies = (distances).argsort()[:k] + top_k = distances[top_k_indicies] + return top_k.mean() + +def get_mean_bc(env, policy, tslimit, num_rollouts=1): + novelty_vector = [] + for n in range(num_rollouts): + rew, t, nv = policy.rollout(env, timestep_limit=tslimit) + novelty_vector.append(nv) + return np.mean(novelty_vector, axis=0) + +def setup_env(exp): + import gym + gym.undo_logger_setup() + config = Config(**exp['config']) + env = gym.make(exp['env_id']) + if exp['policy']['type'] == "ESAtariPolicy": + from .atari_wrappers import wrap_deepmind + env = wrap_deepmind(env) + return config, env + +def setup_policy(env, exp, single_threaded): + from . import policies, tf_util + sess = make_session(single_threaded=single_threaded) + policy = getattr(policies, exp['policy']['type'])(env.observation_space, env.action_space, **exp['policy']['args']) + tf_util.initialize() + return sess, policy + +def run_master(master_redis_cfg, log_dir, exp): + logger.info('run_master: {}'.format(locals())) + from .optimizers import SGD, Adam + from . import tabular_logger as tlogger + config, env = setup_env(exp) + algo_type = exp['algo_type'] + master = MasterClient(master_redis_cfg) + noise = SharedNoiseTable() + rs = np.random.RandomState() + ref_batch = get_ref_batch(env, batch_size=128) + + pop_size = int(exp['novelty_search']['population_size']) + num_rollouts = int(exp['novelty_search']['num_rollouts']) + theta_dict = {} + optimizer_dict = {} + obstat_dict = {} + curr_parent = 0 + + if isinstance(config.episode_cutoff_mode, int): + tslimit, incr_tslimit_threshold, tslimit_incr_ratio, tslimit_max = config.episode_cutoff_mode, None, None, config.episode_cutoff_mode + adaptive_tslimit = False + + elif config.episode_cutoff_mode.startswith('adaptive:'): + _, args = config.episode_cutoff_mode.split(':') + arg0, arg1, arg2, arg3 = args.split(',') + tslimit, incr_tslimit_threshold, tslimit_incr_ratio, tslimit_max = int(arg0), float(arg1), float(arg2), float(arg3) + adaptive_tslimit = True + logger.info( + 'Starting timestep limit set to {}. When {}% of rollouts hit the limit, it will be increased by {}. The maximum timestep limit is {}'.format( + tslimit, incr_tslimit_threshold * 100, tslimit_incr_ratio, tslimit_max)) + + elif config.episode_cutoff_mode == 'env_default': + tslimit, incr_tslimit_threshold, tslimit_incr_ratio, tslimit_max = None, None, None, None + adaptive_tslimit = False + else: + raise NotImplementedError(config.episode_cutoff_mode) + + for p in range(pop_size): + with tf.Graph().as_default(): + sess, policy = setup_policy(env, exp, single_threaded=False) + + if 'init_from' in exp['policy']: + logger.info('Initializing weights from {}'.format(exp['policy']['init_from'])) + policy.initialize_from(exp['policy']['init_from'], ob_stat) + + theta = policy.get_trainable_flat() + optimizer = {'sgd': SGD, 'adam': Adam}[exp['optimizer']['type']](theta, **exp['optimizer']['args']) + + if policy.needs_ob_stat: + ob_stat = RunningStat(env.observation_space.shape, eps=1e-2) + obstat_dict[p] = ob_stat + + if policy.needs_ref_batch: + policy.set_ref_batch(ref_batch) + + mean_bc = get_mean_bc(env, policy, tslimit_max, num_rollouts) + master.add_to_novelty_archive(mean_bc) + + theta_dict[p] = theta + optimizer_dict[p] = optimizer + + episodes_so_far = 0 + timesteps_so_far = 0 + tstart = time.time() + master.declare_experiment(exp) + + while True: + step_tstart = time.time() + + theta = theta_dict[curr_parent] + policy.set_trainable_flat(theta) + optimizer = optimizer_dict[curr_parent] + + if policy.needs_ob_stat: + ob_stat = deepcopy(obstat_dict[curr_parent]) + + assert theta.dtype == np.float32 + + curr_task_id = master.declare_task(Task( + params=theta, + ob_mean=ob_stat.mean if policy.needs_ob_stat else None, + ob_std=ob_stat.std if policy.needs_ob_stat else None, + ref_batch=ref_batch if policy.needs_ref_batch else None, + timestep_limit=tslimit + )) + tlogger.log('********** Iteration {} **********'.format(curr_task_id)) + + # Pop off results for the current task + curr_task_results, eval_rets, eval_lens, worker_ids = [], [], [], [] + num_results_skipped, num_episodes_popped, num_timesteps_popped, ob_count_this_batch = 0, 0, 0, 0 + while num_episodes_popped < config.episodes_per_batch or num_timesteps_popped < config.timesteps_per_batch: + # Wait for a result + task_id, result = master.pop_result() + assert isinstance(task_id, int) and isinstance(result, Result) + assert (result.eval_return is None) == (result.eval_length is None) + worker_ids.append(result.worker_id) + + if result.eval_length is not None: + # This was an eval job + episodes_so_far += 1 + timesteps_so_far += result.eval_length + # Store the result only for current tasks + if task_id == curr_task_id: + eval_rets.append(result.eval_return) + eval_lens.append(result.eval_length) + else: + assert (result.noise_inds_n.ndim == 1 and + result.returns_n2.shape == result.lengths_n2.shape == (len(result.noise_inds_n), 2)) + assert result.returns_n2.dtype == np.float32 + # Update counts + result_num_eps = result.lengths_n2.size + result_num_timesteps = result.lengths_n2.sum() + episodes_so_far += result_num_eps + timesteps_so_far += result_num_timesteps + # Store results only for current tasks + if task_id == curr_task_id: + curr_task_results.append(result) + num_episodes_popped += result_num_eps + num_timesteps_popped += result_num_timesteps + # Update ob stats + if policy.needs_ob_stat and result.ob_count > 0: + ob_stat.increment(result.ob_sum, result.ob_sumsq, result.ob_count) + ob_count_this_batch += result.ob_count + else: + num_results_skipped += 1 + + # Compute skip fraction + frac_results_skipped = num_results_skipped / (num_results_skipped + len(curr_task_results)) + if num_results_skipped > 0: + logger.warning('Skipped {} out of date results ({:.2f}%)'.format( + num_results_skipped, 100. * frac_results_skipped)) + + # Assemble results + noise_inds_n = np.concatenate([r.noise_inds_n for r in curr_task_results]) + returns_n2 = np.concatenate([r.returns_n2 for r in curr_task_results]) + lengths_n2 = np.concatenate([r.lengths_n2 for r in curr_task_results]) + signreturns_n2 = np.concatenate([r.signreturns_n2 for r in curr_task_results]) + + assert noise_inds_n.shape[0] == returns_n2.shape[0] == lengths_n2.shape[0] + # Process returns + if config.return_proc_mode == 'centered_rank': + proc_returns_n2 = compute_centered_ranks(returns_n2) + elif config.return_proc_mode == 'sign': + proc_returns_n2 = signreturns_n2 + elif config.return_proc_mode == 'centered_sign_rank': + proc_returns_n2 = compute_centered_ranks(signreturns_n2) + else: + raise NotImplementedError(config.return_proc_mode) + + if algo_type == "nsr": + rew_ranks = compute_centered_ranks(returns_n2) + proc_returns_n2 = (rew_ranks + proc_returns_n2) / 2.0 + + # Compute and take step + g, count = batched_weighted_sum( + proc_returns_n2[:, 0] - proc_returns_n2[:, 1], + (noise.get(idx, policy.num_params) for idx in noise_inds_n), + batch_size=500 + ) + g /= returns_n2.size + assert g.shape == (policy.num_params,) and g.dtype == np.float32 and count == len(noise_inds_n) + update_ratio, theta = optimizer.update(-g + config.l2coeff * theta) + + policy.set_trainable_flat(theta) + + # Update ob stat (we're never running the policy in the master, but we might be snapshotting the policy) + if policy.needs_ob_stat: + policy.set_ob_stat(ob_stat.mean, ob_stat.std) + + mean_bc = get_mean_bc(env, policy, tslimit_max, num_rollouts) + master.add_to_novelty_archive(mean_bc) + + # Update number of steps to take + if adaptive_tslimit and (lengths_n2 == tslimit).mean() >= incr_tslimit_threshold: + old_tslimit = tslimit + tslimit = min(int(tslimit_incr_ratio * tslimit), tslimit_max) + logger.info('Increased timestep limit from {} to {}'.format(old_tslimit, tslimit)) + + step_tend = time.time() + tlogger.record_tabular("ParentId", curr_parent) + tlogger.record_tabular("EpRewMean", returns_n2.mean()) + tlogger.record_tabular("EpRewStd", returns_n2.std()) + tlogger.record_tabular("EpLenMean", lengths_n2.mean()) + + tlogger.record_tabular("EvalEpRewMean", np.nan if not eval_rets else np.mean(eval_rets)) + tlogger.record_tabular("EvalEpRewStd", np.nan if not eval_rets else np.std(eval_rets)) + tlogger.record_tabular("EvalEpLenMean", np.nan if not eval_rets else np.mean(eval_lens)) + tlogger.record_tabular("EvalPopRank", np.nan if not eval_rets else ( + np.searchsorted(np.sort(returns_n2.ravel()), eval_rets).mean() / returns_n2.size)) + tlogger.record_tabular("EvalEpCount", len(eval_rets)) + + tlogger.record_tabular("Norm", float(np.square(policy.get_trainable_flat()).sum())) + tlogger.record_tabular("GradNorm", float(np.square(g).sum())) + tlogger.record_tabular("UpdateRatio", float(update_ratio)) + + tlogger.record_tabular("EpisodesThisIter", lengths_n2.size) + tlogger.record_tabular("EpisodesSoFar", episodes_so_far) + tlogger.record_tabular("TimestepsThisIter", lengths_n2.sum()) + tlogger.record_tabular("TimestepsSoFar", timesteps_so_far) + + num_unique_workers = len(set(worker_ids)) + tlogger.record_tabular("UniqueWorkers", num_unique_workers) + tlogger.record_tabular("UniqueWorkersFrac", num_unique_workers / len(worker_ids)) + tlogger.record_tabular("ResultsSkippedFrac", frac_results_skipped) + tlogger.record_tabular("ObCount", ob_count_this_batch) + + tlogger.record_tabular("TimeElapsedThisIter", step_tend - step_tstart) + tlogger.record_tabular("TimeElapsed", step_tend - tstart) + tlogger.dump_tabular() + + #updating population parameters + theta_dict[curr_parent] = policy.get_trainable_flat() + optimizer_dict[curr_parent] = optimizer + if policy.needs_ob_stat: + obstat_dict[curr_parent] = ob_stat + + if exp['novelty_search']['selection_method'] == "novelty_prob": + novelty_probs = [] + archive = master.get_archive() + for p in range(pop_size): + policy.set_trainable_flat(theta_dict[p]) + mean_bc = get_mean_bc(env, policy, tslimit_max, num_rollouts) + nov_p = compute_novelty_vs_archive(archive, mean_bc, exp['novelty_search']['k']) + novelty_probs.append(nov_p) + novelty_probs = np.array(novelty_probs) / float(np.sum(novelty_probs)) + curr_parent = np.random.choice(range(pop_size), 1, p=novelty_probs)[0] + elif exp['novelty_search']['selection_method'] == "round_robin": + curr_parent = (curr_parent + 1) % pop_size + else: + raise NotImplementedError(exp['novelty_search']['selection_method']) + + # if config.snapshot_freq != 0 and curr_task_id % config.snapshot_freq == 0: + if config.snapshot_freq != 0: + import os.path as osp + filename = 'snapshot_iter{:05d}_rew{}.h5'.format( + curr_task_id, + np.nan if not eval_rets else int(np.mean(eval_rets)) + ) + assert not osp.exists(filename) + policy.save(filename) + tlogger.log('Saved snapshot {}'.format(filename)) + +def run_worker(master_redis_cfg, relay_redis_cfg, noise, *, min_task_runtime=.2): + logger.info('run_worker: {}'.format(locals())) + assert isinstance(noise, SharedNoiseTable) + worker = WorkerClient(relay_redis_cfg, master_redis_cfg) + exp = worker.get_experiment() + config, env = setup_env(exp) + sess, policy = setup_policy(env, exp, single_threaded=False) + rs = np.random.RandomState() + worker_id = rs.randint(2 ** 31) + previous_task_id = -1 + + assert policy.needs_ob_stat == (config.calc_obstat_prob != 0) + + while True: + task_id, task_data = worker.get_current_task() + task_tstart = time.time() + assert isinstance(task_id, int) and isinstance(task_data, Task) + + if policy.needs_ob_stat: + policy.set_ob_stat(task_data.ob_mean, task_data.ob_std) + + if policy.needs_ref_batch: + policy.set_ref_batch(task_data.ref_batch) + + if task_id != previous_task_id: + archive = worker.get_archive() + previous_task_id = task_id + + if rs.rand() < config.eval_prob: + # Evaluation: noiseless weights and noiseless actions + policy.set_trainable_flat(task_data.params) + eval_rews, eval_length, _ = policy.rollout(env, timestep_limit=task_data.timestep_limit) + eval_return = eval_rews.sum() + logger.info('Eval result: task={} return={:.3f} length={}'.format(task_id, eval_return, eval_length)) + worker.push_result(task_id, Result( + worker_id=worker_id, + noise_inds_n=None, + returns_n2=None, + signreturns_n2=None, + lengths_n2=None, + eval_return=eval_return, + eval_length=eval_length, + ob_sum=None, + ob_sumsq=None, + ob_count=None + )) + else: + # Rollouts with noise + noise_inds, returns, signreturns, lengths = [], [], [], [] + task_ob_stat = RunningStat(env.observation_space.shape, eps=0.) # eps=0 because we're incrementing only + + while not noise_inds or time.time() - task_tstart < min_task_runtime: + noise_idx = noise.sample_index(rs, policy.num_params) + v = config.noise_stdev * noise.get(noise_idx, policy.num_params) + + policy.set_trainable_flat(task_data.params + v) + rews_pos, len_pos, nov_vec_pos = rollout_and_update_ob_stat( + policy, env, task_data.timestep_limit, rs, task_ob_stat, config.calc_obstat_prob) + + policy.set_trainable_flat(task_data.params - v) + rews_neg, len_neg, nov_vec_neg = rollout_and_update_ob_stat( + policy, env, task_data.timestep_limit, rs, task_ob_stat, config.calc_obstat_prob) + + nov_pos = compute_novelty_vs_archive(archive, nov_vec_pos, exp['novelty_search']['k']) + nov_neg = compute_novelty_vs_archive(archive, nov_vec_neg, exp['novelty_search']['k']) + + signreturns.append([nov_pos, nov_neg]) + noise_inds.append(noise_idx) + returns.append([rews_pos.sum(), rews_neg.sum()]) + lengths.append([len_pos, len_neg]) + + worker.push_result(task_id, Result( + worker_id=worker_id, + noise_inds_n=np.array(noise_inds), + returns_n2=np.array(returns, dtype=np.float32), + signreturns_n2=np.array(signreturns, dtype=np.float32), + lengths_n2=np.array(lengths, dtype=np.int32), + eval_return=None, + eval_length=None, + ob_sum=None if task_ob_stat.count == 0 else task_ob_stat.sum, + ob_sumsq=None if task_ob_stat.count == 0 else task_ob_stat.sumsq, + ob_count=task_ob_stat.count + )) diff --git a/es_distributed/optimizers.py b/es_distributed/optimizers.py new file mode 100644 index 00000000..1f215dbd --- /dev/null +++ b/es_distributed/optimizers.py @@ -0,0 +1,51 @@ +import numpy as np + + +class Optimizer(object): + def __init__(self, theta): + self.theta = theta + self.dim = len(self.theta) + self.t = 0 + + def update(self, globalg): + self.t += 1 + step = self._compute_step(globalg) + theta = self.theta + ratio = np.linalg.norm(step) / np.linalg.norm(theta) + new_theta = self.theta + step + self.theta = new_theta + return ratio, new_theta + + def _compute_step(self, globalg): + raise NotImplementedError + + +class SGD(Optimizer): + def __init__(self, theta, stepsize, momentum=0.9): + Optimizer.__init__(self, theta) + self.v = np.zeros(self.dim, dtype=np.float32) + self.stepsize, self.momentum = stepsize, momentum + + def _compute_step(self, globalg): + self.v = self.momentum * self.v + (1. - self.momentum) * globalg + step = -self.stepsize * self.v + return step + + +class Adam(Optimizer): + def __init__(self, theta, stepsize, beta1=0.9, beta2=0.999, epsilon=1e-08): + Optimizer.__init__(self, theta) + self.stepsize = stepsize + self.beta1 = beta1 + self.beta2 = beta2 + self.epsilon = epsilon + self.m = np.zeros(self.dim, dtype=np.float32) + self.v = np.zeros(self.dim, dtype=np.float32) + + def _compute_step(self, globalg): + a = self.stepsize * np.sqrt(1 - self.beta2 ** self.t) / (1 - self.beta1 ** self.t) + self.m = self.beta1 * self.m + (1 - self.beta1) * globalg + self.v = self.beta2 * self.v + (1 - self.beta2) * (globalg * globalg) + step = -a * self.m / (np.sqrt(self.v) + self.epsilon) + return step + diff --git a/es_distributed/policies.py b/es_distributed/policies.py new file mode 100644 index 00000000..470d3606 --- /dev/null +++ b/es_distributed/policies.py @@ -0,0 +1,493 @@ +import logging +import pickle +import time + +import h5py +import numpy as np +import tensorflow as tf +import tensorflow.contrib.layers as layers + +from . import tf_util as U + +logger = logging.getLogger(__name__) + + +class Policy: + def __init__(self, *args, **kwargs): + self.args, self.kwargs = args, kwargs + self.scope = self._initialize(*args, **kwargs) + self.all_variables = tf.get_collection(tf.GraphKeys.VARIABLES, self.scope.name) + + self.trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope.name) + self.num_params = sum(int(np.prod(v.get_shape().as_list())) for v in self.trainable_variables) + self._setfromflat = U.SetFromFlat(self.trainable_variables) + self._getflat = U.GetFlat(self.trainable_variables) + + logger.info('Trainable variables ({} parameters)'.format(self.num_params)) + for v in self.trainable_variables: + shp = v.get_shape().as_list() + logger.info('- {} shape:{} size:{}'.format(v.name, shp, np.prod(shp))) + logger.info('All variables') + for v in self.all_variables: + shp = v.get_shape().as_list() + logger.info('- {} shape:{} size:{}'.format(v.name, shp, np.prod(shp))) + + placeholders = [tf.placeholder(v.value().dtype, v.get_shape().as_list()) for v in self.all_variables] + self.set_all_vars = U.function( + inputs=placeholders, + outputs=[], + updates=[tf.group(*[v.assign(p) for v, p in zip(self.all_variables, placeholders)])] + ) + + def reinitialize(self): + for v in self.trainable_variables: + v.reinitialize.eval() + + def _initialize(self, *args, **kwargs): + raise NotImplementedError + + def save(self, filename): + assert filename.endswith('.h5') + with h5py.File(filename, 'w', libver='latest') as f: + for v in self.all_variables: + f[v.name] = v.eval() + # TODO: it would be nice to avoid pickle, but it's convenient to pass Python objects to _initialize + # (like Gym spaces or numpy arrays) + f.attrs['name'] = type(self).__name__ + f.attrs['args_and_kwargs'] = np.void(pickle.dumps((self.args, self.kwargs), protocol=-1)) + + @classmethod + def Load(cls, filename, extra_kwargs=None): + with h5py.File(filename, 'r') as f: + args, kwargs = pickle.loads(f.attrs['args_and_kwargs'].tostring()) + if extra_kwargs: + kwargs.update(extra_kwargs) + policy = cls(*args, **kwargs) + policy.set_all_vars(*[f[v.name][...] for v in policy.all_variables]) + return policy + + # === Rollouts/training === + + def rollout(self, env, *, render=False, timestep_limit=None, save_obs=False, random_stream=None): + """ + If random_stream is provided, the rollout will take noisy actions with noise drawn from that stream. + Otherwise, no action noise will be added. + """ + env_timestep_limit = env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps') + timestep_limit = env_timestep_limit if timestep_limit is None else min(timestep_limit, env_timestep_limit) + rews = [] + t = 0 + if save_obs: + obs = [] + ob = env.reset() + for _ in range(timestep_limit): + ac = self.act(ob[None], random_stream=random_stream)[0] + if save_obs: + obs.append(ob) + ob, rew, done, _ = env.step(ac) + rews.append(rew) + t += 1 + if render: + env.render() + if done: + break + rews = np.array(rews, dtype=np.float32) + if save_obs: + return rews, t, np.array(obs) + return rews, t + + def act(self, ob, random_stream=None): + raise NotImplementedError + + def set_trainable_flat(self, x): + self._setfromflat(x) + + def get_trainable_flat(self): + return self._getflat() + + @property + def needs_ob_stat(self): + raise NotImplementedError + + def set_ob_stat(self, ob_mean, ob_std): + raise NotImplementedError + + +def bins(x, dim, num_bins, name): + scores = U.dense(x, dim * num_bins, name, U.normc_initializer(0.01)) + scores_nab = tf.reshape(scores, [-1, dim, num_bins]) + return tf.argmax(scores_nab, 2) # 0 ... num_bins-1 + + +class MujocoPolicy(Policy): + def _initialize(self, ob_space, ac_space, ac_bins, ac_noise_std, nonlin_type, hidden_dims, connection_type): + self.ac_space = ac_space + self.ac_bins = ac_bins + self.ac_noise_std = ac_noise_std + self.hidden_dims = hidden_dims + self.connection_type = connection_type + + assert len(ob_space.shape) == len(self.ac_space.shape) == 1 + assert np.all(np.isfinite(self.ac_space.low)) and np.all(np.isfinite(self.ac_space.high)), \ + 'Action bounds required' + + self.nonlin = {'tanh': tf.tanh, 'relu': tf.nn.relu, 'lrelu': U.lrelu, 'elu': tf.nn.elu}[nonlin_type] + + with tf.variable_scope(type(self).__name__) as scope: + # Observation normalization + ob_mean = tf.get_variable( + 'ob_mean', ob_space.shape, tf.float32, tf.constant_initializer(np.nan), trainable=False) + ob_std = tf.get_variable( + 'ob_std', ob_space.shape, tf.float32, tf.constant_initializer(np.nan), trainable=False) + in_mean = tf.placeholder(tf.float32, ob_space.shape) + in_std = tf.placeholder(tf.float32, ob_space.shape) + self._set_ob_mean_std = U.function([in_mean, in_std], [], updates=[ + tf.assign(ob_mean, in_mean), + tf.assign(ob_std, in_std), + ]) + + # Policy network + o = tf.placeholder(tf.float32, [None] + list(ob_space.shape)) + a = self._make_net(tf.clip_by_value((o - ob_mean) / ob_std, -5.0, 5.0)) + self._act = U.function([o], a) + return scope + + def _make_net(self, o): + # Process observation + if self.connection_type == 'ff': + x = o + for ilayer, hd in enumerate(self.hidden_dims): + x = self.nonlin(U.dense(x, hd, 'l{}'.format(ilayer), U.normc_initializer(1.0))) + else: + raise NotImplementedError(self.connection_type) + + # Map to action + adim, ahigh, alow = self.ac_space.shape[0], self.ac_space.high, self.ac_space.low + assert isinstance(self.ac_bins, str) + ac_bin_mode, ac_bin_arg = self.ac_bins.split(':') + + if ac_bin_mode == 'uniform': + # Uniformly spaced bins, from ac_space.low to ac_space.high + num_ac_bins = int(ac_bin_arg) + aidx_na = bins(x, adim, num_ac_bins, 'out') # 0 ... num_ac_bins-1 + ac_range_1a = (ahigh - alow)[None, :] + a = 1. / (num_ac_bins - 1.) * tf.to_float(aidx_na) * ac_range_1a + alow[None, :] + + elif ac_bin_mode == 'custom': + # Custom bins specified as a list of values from -1 to 1 + # The bins are rescaled to ac_space.low to ac_space.high + acvals_k = np.array(list(map(float, ac_bin_arg.split(','))), dtype=np.float32) + logger.info('Custom action values: ' + ' '.join('{:.3f}'.format(x) for x in acvals_k)) + assert acvals_k.ndim == 1 and acvals_k[0] == -1 and acvals_k[-1] == 1 + acvals_ak = ( + (ahigh - alow)[:, None] / (acvals_k[-1] - acvals_k[0]) * (acvals_k - acvals_k[0])[None, :] + + alow[:, None] + ) + + aidx_na = bins(x, adim, len(acvals_k), 'out') # values in [0, k-1] + a = tf.gather_nd( + acvals_ak, + tf.concat(2, [ + tf.tile(np.arange(adim)[None, :, None], [tf.shape(aidx_na)[0], 1, 1]), + tf.expand_dims(aidx_na, -1) + ]) # (n,a,2) + ) # (n,a) + elif ac_bin_mode == 'continuous': + a = U.dense(x, adim, 'out', U.normc_initializer(0.01)) + else: + raise NotImplementedError(ac_bin_mode) + + return a + + def act(self, ob, random_stream=None): + a = self._act(ob) + if random_stream is not None and self.ac_noise_std != 0: + a += random_stream.randn(*a.shape) * self.ac_noise_std + return a + + @property + def needs_ob_stat(self): + return True + + @property + def needs_ref_batch(self): + return False + + def set_ob_stat(self, ob_mean, ob_std): + self._set_ob_mean_std(ob_mean, ob_std) + + def initialize_from(self, filename, ob_stat=None): + """ + Initializes weights from another policy, which must have the same architecture (variable names), + but the weight arrays can be smaller than the current policy. + """ + with h5py.File(filename, 'r') as f: + f_var_names = [] + f.visititems(lambda name, obj: f_var_names.append(name) if isinstance(obj, h5py.Dataset) else None) + assert set(v.name for v in self.all_variables) == set(f_var_names), 'Variable names do not match' + + init_vals = [] + for v in self.all_variables: + shp = v.get_shape().as_list() + f_shp = f[v.name].shape + assert len(shp) == len(f_shp) and all(a >= b for a, b in zip(shp, f_shp)), \ + 'This policy must have more weights than the policy to load' + init_val = v.eval() + # ob_mean and ob_std are initialized with nan, so set them manually + if 'ob_mean' in v.name: + init_val[:] = 0 + init_mean = init_val + elif 'ob_std' in v.name: + init_val[:] = 0.001 + init_std = init_val + # Fill in subarray from the loaded policy + init_val[tuple([np.s_[:s] for s in f_shp])] = f[v.name] + init_vals.append(init_val) + self.set_all_vars(*init_vals) + + if ob_stat is not None: + ob_stat.set_from_init(init_mean, init_std, init_count=1e5) + + + def _get_pos(self, model): + mass = model.body_mass + xpos = model.data.xipos + center = (np.sum(mass * xpos, 0) / np.sum(mass)) + return center[0], center[1], center[2] + + + def rollout(self, env, *, render=False, timestep_limit=None, save_obs=False, random_stream=None): + """ + If random_stream is provided, the rollout will take noisy actions with noise drawn from that stream. + Otherwise, no action noise will be added. + """ + env_timestep_limit = env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps') + timestep_limit = env_timestep_limit if timestep_limit is None else min(timestep_limit, env_timestep_limit) + rews = [] + t = 0 + if save_obs: + obs = [] + ob = env.reset() + for _ in range(timestep_limit): + ac = self.act(ob[None], random_stream=random_stream)[0] + if save_obs: + obs.append(ob) + ob, rew, done, _ = env.step(ac) + rews.append(rew) + t += 1 + if render: + env.render() + if done: + break + + x_pos, y_pos, _ = self._get_pos(env.unwrapped.model) + rews = np.array(rews, dtype=np.float32) + novelty_vector = np.array([x_pos, y_pos]) + if save_obs: + return rews, t, np.array(obs), novelty_vector + return rews, t, novelty_vector + + +class ESAtariPolicy(Policy): + def _initialize(self, ob_space, ac_space): + self.ob_space_shape = ob_space.shape + self.ac_space = ac_space + self.num_actions = ac_space.n + + with tf.variable_scope(type(self).__name__) as scope: + o = tf.placeholder(tf.float32, [None] + list(self.ob_space_shape)) + is_ref_ph = tf.placeholder(tf.bool, shape=[]) + + a = self._make_net(o, is_ref_ph) + self._act = U.function([o, is_ref_ph] , a) + return scope + + def _make_net(self, o, is_ref): + x = o + x = layers.convolution2d(x, num_outputs=16, kernel_size=8, stride=4, activation_fn=None, scope='conv1') + x = layers.batch_norm(x, scale=True, is_training=is_ref, decay=0., updates_collections=None, activation_fn=tf.nn.relu, epsilon=1e-3) + x = layers.convolution2d(x, num_outputs=32, kernel_size=4, stride=2, activation_fn=None, scope='conv2') + x = layers.batch_norm(x, scale=True, is_training=is_ref, decay=0., updates_collections=None, activation_fn=tf.nn.relu, epsilon=1e-3) + + x = layers.flatten(x) + x = layers.fully_connected(x, num_outputs=256, activation_fn=None, scope='fc') + x = layers.batch_norm(x, scale=True, is_training=is_ref, decay=0., updates_collections=None, activation_fn=tf.nn.relu, epsilon=1e-3) + a = layers.fully_connected(x, num_outputs=self.num_actions, activation_fn=None, scope='out') + return tf.argmax(a,1) + + def set_ref_batch(self, ref_batch): + self.ref_list = [] + self.ref_list.append(ref_batch) + self.ref_list.append(True) + + @property + def needs_ob_stat(self): + return False + + @property + def needs_ref_batch(self): + return True + + def initialize_from(self, filename): + """ + Initializes weights from another policy, which must have the same architecture (variable names), + but the weight arrays can be smaller than the current policy. + """ + with h5py.File(filename, 'r') as f: + f_var_names = [] + f.visititems(lambda name, obj: f_var_names.append(name) if isinstance(obj, h5py.Dataset) else None) + assert set(v.name for v in self.all_variables) == set(f_var_names), 'Variable names do not match' + + init_vals = [] + for v in self.all_variables: + shp = v.get_shape().as_list() + f_shp = f[v.name].shape + assert len(shp) == len(f_shp) and all(a >= b for a, b in zip(shp, f_shp)), \ + 'This policy must have more weights than the policy to load' + init_val = v.eval() + # ob_mean and ob_std are initialized with nan, so set them manually + if 'ob_mean' in v.name: + init_val[:] = 0 + init_mean = init_val + elif 'ob_std' in v.name: + init_val[:] = 0.001 + init_std = init_val + # Fill in subarray from the loaded policy + init_val[tuple([np.s_[:s] for s in f_shp])] = f[v.name] + init_vals.append(init_val) + self.set_all_vars(*init_vals) + + def act(self, train_vars, random_stream=None): + return self._act(*train_vars) + + + def rollout(self, env, *, render=False, timestep_limit=None, save_obs=False, random_stream=None, worker_stats=None, policy_seed=None): + """ + If random_stream is provided, the rollout will take noisy actions with noise drawn from that stream. + Otherwise, no action noise will be added. + """ + env_timestep_limit = env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps') + + timestep_limit = env_timestep_limit if timestep_limit is None else min(timestep_limit, env_timestep_limit) + rews = []; novelty_vector = [] + t = 0 + + if save_obs: + obs = [] + + if policy_seed: + env.seed(policy_seed) + np.random.seed(policy_seed) + if random_stream: + random_stream.seed(policy_seed) + + ob = env.reset() + self.act(self.ref_list, random_stream=random_stream) #passing ref batch through network + + for _ in range(timestep_limit): + start_time = time.time() + ac = self.act([ob[None], False], random_stream=random_stream)[0] + + if worker_stats: + worker_stats.time_comp_act += time.time() - start_time + + start_time = time.time() + ob, rew, done, info = env.step(ac) + ram = env.unwrapped._get_ram() # extracts RAM state information + + if save_obs: + obs.append(ob) + if worker_stats: + worker_stats.time_comp_step += time.time() - start_time + + rews.append(rew) + novelty_vector.append(ram) + + t += 1 + if render: + env.render() + if done: + break + + rews = np.array(rews, dtype=np.float32) + if save_obs: + return rews, t, np.array(obs), np.array(novelty_vector) + return rews, t, np.array(novelty_vector) + + + +class GAAtariPolicy(Policy): + def _initialize(self, ob_space, ac_space, nonlin_type, ac_init_std=0.1): + self.ob_space_shape = ob_space.shape + self.ac_space = ac_space + self.ac_init_std = ac_init_std + self.num_actions = self.ac_space.n + self.nonlin = {'tanh': tf.tanh, 'relu': tf.nn.relu, 'lrelu': U.lrelu, 'elu': tf.nn.elu}[nonlin_type] + + + with tf.variable_scope(type(self).__name__) as scope: + o = tf.placeholder(tf.float32, [None] + list(self.ob_space_shape)) + + a = self._make_net(o) + self._act = U.function([o] , a) + return scope + + def _make_net(self, o): + x = o + x = self.nonlin(U.conv(x, name='conv1', num_outputs=16, kernel_size=8, stride=4, std=1.0)) + x = self.nonlin(U.conv(x, name='conv2', num_outputs=32, kernel_size=4, stride=2, std=1.0)) + + x = U.flattenallbut0(x) + x = self.nonlin(U.dense(x, 256, 'fc', U.normc_initializer(1.0), std=1.0)) + + a = U.dense(x, self.num_actions, 'out', U.normc_initializer(self.ac_init_std), std=self.ac_init_std) + + return tf.argmax(a,1) + + @property + def needs_ob_stat(self): + return False + + @property + def needs_ref_batch(self): + return False + + # Dont add random noise since action space is discrete + def act(self, train_vars, random_stream=None): + return self._act(train_vars) + + def rollout(self, env, *, render=False, timestep_limit=None, save_obs=False, random_stream=None, worker_stats=None, policy_seed=None): + """ + If random_stream is provided, the rollout will take noisy actions with noise drawn from that stream. + Otherwise, no action noise will be added. + """ + env_timestep_limit = env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps') + timestep_limit = env_timestep_limit if timestep_limit is None else min(timestep_limit, env_timestep_limit) + rews = [] + rollout_details = {} + t = 0 + + if save_obs: + obs = [] + + ob = env.reset() + for _ in range(timestep_limit): + ac = self.act(ob[None], random_stream=random_stream)[0] + + if save_obs: + obs.append(ob) + ob, rew, done, info = env.step(ac) + rews.append(rew) + + t += 1 + if render: + env.render() + if done: + break + + # Copy over final positions to the max timesteps + rews = np.array(rews, dtype=np.float32) + if save_obs: + return rews, t, np.array(obs) + return rews, t + diff --git a/es_distributed/rs.py b/es_distributed/rs.py new file mode 100644 index 00000000..421a351b --- /dev/null +++ b/es_distributed/rs.py @@ -0,0 +1,229 @@ +import logging +import time +from collections import namedtuple + +import numpy as np + +from .dist import MasterClient, WorkerClient +from .ga import * + + +def run_master(master_redis_cfg, log_dir, exp): + logger.info('run_master: {}'.format(locals())) + from .optimizers import SGD, Adam + from . import tabular_logger as tlogger + logger.info('Tabular logging to {}'.format(log_dir)) + tlogger.start(log_dir) + config, env, sess, policy = setup(exp, single_threaded=False) + master = MasterClient(master_redis_cfg) + noise = SharedNoiseTable() + rs = np.random.RandomState() + + if isinstance(config.episode_cutoff_mode, int): + tslimit, incr_tslimit_threshold, tslimit_incr_ratio = config.episode_cutoff_mode, None, None + adaptive_tslimit = False + elif config.episode_cutoff_mode.startswith('adaptive:'): + _, args = config.episode_cutoff_mode.split(':') + arg0, arg1, arg2 = args.split(',') + tslimit, incr_tslimit_threshold, tslimit_incr_ratio = int(arg0), float(arg1), float(arg2) + adaptive_tslimit = True + logger.info( + 'Starting timestep limit set to {}. When {}% of rollouts hit the limit, it will be increased by {}'.format( + tslimit, incr_tslimit_threshold * 100, tslimit_incr_ratio)) + elif config.episode_cutoff_mode == 'env_default': + tslimit, incr_tslimit_threshold, tslimit_incr_ratio = None, None, None + adaptive_tslimit = False + else: + raise NotImplementedError(config.episode_cutoff_mode) + + episodes_so_far = 0 + timesteps_so_far = 0 + tstart = time.time() + master.declare_experiment(exp) + best_score = float('-inf') + + while True: + step_tstart = time.time() + theta = policy.get_trainable_flat() + assert theta.dtype == np.float32 + + curr_task_id = master.declare_task(Task( + params=theta, + ob_mean=ob_stat.mean if policy.needs_ob_stat else None, + ob_std=ob_stat.std if policy.needs_ob_stat else None, + timestep_limit=tslimit + )) + tlogger.log('********** Iteration {} **********'.format(curr_task_id)) + + # Pop off results for the current task + curr_task_results, eval_rets, eval_lens, worker_ids = [], [], [], [] + num_results_skipped, num_episodes_popped, num_timesteps_popped, ob_count_this_batch = 0, 0, 0, 0 + while num_episodes_popped < config.episodes_per_batch or num_timesteps_popped < config.timesteps_per_batch: + # Wait for a result + task_id, result = master.pop_result() + assert isinstance(task_id, int) and isinstance(result, Result) + assert (result.eval_return is None) == (result.eval_length is None) + worker_ids.append(result.worker_id) + + if result.eval_length is not None: + # This was an eval job + episodes_so_far += 1 + timesteps_so_far += result.eval_length + # Store the result only for current tasks + if task_id == curr_task_id: + eval_rets.append(result.eval_return) + eval_lens.append(result.eval_length) + else: + assert (result.noise_inds_n.ndim == 1 and + result.returns_n2.shape == result.lengths_n2.shape == (len(result.noise_inds_n), 1)) + assert result.returns_n2.dtype == np.float32 + # Store results only for current tasks + if task_id == curr_task_id: + # Update counts + result_num_eps = result.lengths_n2.size + result_num_timesteps = result.lengths_n2.sum() + episodes_so_far += result_num_eps + timesteps_so_far += result_num_timesteps + + curr_task_results.append(result) + num_episodes_popped += result_num_eps + num_timesteps_popped += result_num_timesteps + # Update ob stats + if policy.needs_ob_stat and result.ob_count > 0: + ob_stat.increment(result.ob_sum, result.ob_sumsq, result.ob_count) + ob_count_this_batch += result.ob_count + else: + num_results_skipped += 1 + + # Compute skip fraction + frac_results_skipped = num_results_skipped / (num_results_skipped + len(curr_task_results)) + if num_results_skipped > 0: + logger.warning('Skipped {} out of date results ({:.2f}%)'.format( + num_results_skipped, 100. * frac_results_skipped)) + + # Assemble results + noise_inds_n = np.concatenate([r.noise_inds_n for r in curr_task_results]) + returns_n2 = np.concatenate([r.returns_n2 for r in curr_task_results]) + lengths_n2 = np.concatenate([r.lengths_n2 for r in curr_task_results]) + assert noise_inds_n.shape[0] == returns_n2.shape[0] == lengths_n2.shape[0] + # Process returns + idx = np.argmax(returns_n2) + if returns_n2[idx] > best_score: + policy.set_trainable_flat(noise.get(noise_inds_n[idx], policy.num_params)) + policy.reinitialize() + best_score = returns_n2[idx] + # Update number of steps to take + if adaptive_tslimit and (lengths_n2 == tslimit).mean() >= incr_tslimit_threshold: + old_tslimit = tslimit + tslimit = int(tslimit_incr_ratio * tslimit) + logger.info('Increased timestep limit from {} to {}'.format(old_tslimit, tslimit)) + + step_tend = time.time() + tlogger.record_tabular("EpRewMax", returns_n2.max()) + tlogger.record_tabular("EpRewMean", returns_n2.mean()) + tlogger.record_tabular("EpRewStd", returns_n2.std()) + tlogger.record_tabular("EpLenMean", lengths_n2.mean()) + + tlogger.record_tabular("EvalEpRewMean", np.nan if not eval_rets else np.mean(eval_rets)) + tlogger.record_tabular("EvalEpRewMedian", np.nan if not eval_rets else np.median(eval_rets)) + tlogger.record_tabular("EvalEpRewStd", np.nan if not eval_rets else np.std(eval_rets)) + tlogger.record_tabular("EvalEpLenMean", np.nan if not eval_rets else np.mean(eval_lens)) + tlogger.record_tabular("EvalPopRank", np.nan if not eval_rets else ( + np.searchsorted(np.sort(returns_n2.ravel()), eval_rets).mean() / returns_n2.size)) + tlogger.record_tabular("EvalEpCount", len(eval_rets)) + + tlogger.record_tabular("Norm", float(np.square(policy.get_trainable_flat()).sum())) + + tlogger.record_tabular("EpisodesThisIter", lengths_n2.size) + tlogger.record_tabular("EpisodesSoFar", episodes_so_far) + tlogger.record_tabular("TimestepsThisIter", lengths_n2.sum()) + tlogger.record_tabular("TimestepsSoFar", timesteps_so_far) + + num_unique_workers = len(set(worker_ids)) + tlogger.record_tabular("UniqueWorkers", num_unique_workers) + tlogger.record_tabular("UniqueWorkersFrac", num_unique_workers / len(worker_ids)) + tlogger.record_tabular("ResultsSkippedFrac", frac_results_skipped) + tlogger.record_tabular("ObCount", ob_count_this_batch) + + tlogger.record_tabular("TimeElapsedThisIter", step_tend - step_tstart) + tlogger.record_tabular("TimeElapsed", step_tend - tstart) + tlogger.dump_tabular() + + # if config.snapshot_freq != 0 and curr_task_id % config.snapshot_freq == 0: + if config.snapshot_freq != 0: + import os.path as osp + filename = 'snapshot_iter{:05d}_rew{}.h5'.format( + curr_task_id, + np.nan if not eval_rets else int(np.mean(eval_rets)) + ) + assert not osp.exists(filename) + policy.save(filename) + tlogger.log('Saved snapshot {}'.format(filename)) + +def run_worker(master_redis_cfg, relay_redis_cfg, noise, *, min_task_runtime=.2): + logger.info('run_worker: {}'.format(locals())) + assert isinstance(noise, SharedNoiseTable) + worker = WorkerClient(master_redis_cfg, relay_redis_cfg) + exp = worker.get_experiment() + config, env, sess, policy = setup(exp, single_threaded=True) + rs = np.random.RandomState() + worker_id = rs.randint(2 ** 31) + + assert policy.needs_ob_stat == (config.calc_obstat_prob != 0) + + while True: + task_id, task_data = worker.get_current_task() + task_tstart = time.time() + assert isinstance(task_id, int) and isinstance(task_data, Task) + if policy.needs_ob_stat: + policy.set_ob_stat(task_data.ob_mean, task_data.ob_std) + + if rs.rand() < config.eval_prob: + # Evaluation: noiseless weights and noiseless actions + policy.set_trainable_flat(task_data.params) + eval_rews, eval_length = policy.rollout(env) # eval rollouts don't obey task_data.timestep_limit + eval_return = eval_rews.sum() + logger.info('Eval result: task={} return={:.3f} length={}'.format(task_id, eval_return, eval_length)) + worker.push_result(task_id, Result( + worker_id=worker_id, + noise_inds_n=None, + returns_n2=None, + signreturns_n2=None, + lengths_n2=None, + eval_return=eval_return, + eval_length=eval_length, + ob_sum=None, + ob_sumsq=None, + ob_count=None + )) + else: + # Rollouts with noise + noise_inds, returns, signreturns, lengths = [], [], [], [] + task_ob_stat = RunningStat(env.observation_space.shape, eps=0.) # eps=0 because we're incrementing only + + while not noise_inds or time.time() - task_tstart < min_task_runtime: + noise_idx = noise.sample_index(rs, policy.num_params) + v = noise.get(noise_idx, policy.num_params) + + policy.set_trainable_flat(v) + policy.reinitialize() + rews_pos, len_pos = rollout_and_update_ob_stat( + policy, env, task_data.timestep_limit, rs, task_ob_stat, config.calc_obstat_prob) + + noise_inds.append(noise_idx) + returns.append([rews_pos.sum()]) + signreturns.append([np.sign(rews_pos).sum()]) + lengths.append([len_pos]) + + worker.push_result(task_id, Result( + worker_id=worker_id, + noise_inds_n=np.array(noise_inds), + returns_n2=np.array(returns, dtype=np.float32), + signreturns_n2=np.array(signreturns, dtype=np.float32), + lengths_n2=np.array(lengths, dtype=np.int32), + eval_return=None, + eval_length=None, + ob_sum=None if task_ob_stat.count == 0 else task_ob_stat.sum, + ob_sumsq=None if task_ob_stat.count == 0 else task_ob_stat.sumsq, + ob_count=task_ob_stat.count + )) diff --git a/es_distributed/tabular_logger.py b/es_distributed/tabular_logger.py new file mode 100644 index 00000000..b3426b08 --- /dev/null +++ b/es_distributed/tabular_logger.py @@ -0,0 +1,223 @@ +import os +import shutil +import sys +import time +from collections import OrderedDict + +import tensorflow as tf +from tensorflow.core.util import event_pb2 +from tensorflow.python import pywrap_tensorflow +from tensorflow.python.util import compat + +DEBUG = 10 +INFO = 20 +WARN = 30 +ERROR = 40 + +DISABLED = 50 + +class TbWriter(object): + """ + Based on SummaryWriter, but changed to allow for a different prefix + and to get rid of multithreading + oops, ended up using the same prefix anyway. + """ + def __init__(self, dir, prefix): + self.dir = dir + self.step = 1 # Start at 1, because EvWriter automatically generates an object with step=0 + self.evwriter = pywrap_tensorflow.EventsWriter(compat.as_bytes(os.path.join(dir, prefix))) + def write_values(self, key2val): + summary = tf.Summary(value=[tf.Summary.Value(tag=k, simple_value=float(v)) + for (k, v) in key2val.items()]) + event = event_pb2.Event(wall_time=time.time(), summary=summary) + event.step = self.step # is there any reason why you'd want to specify the step? + self.evwriter.WriteEvent(event) + self.evwriter.Flush() + self.step += 1 + def close(self): + self.evwriter.Close() + +# ================================================================ +# API +# ================================================================ + +def start(dir): + """ + dir: directory to put all output files + force: if dir already exists, should we delete it, or throw a RuntimeError? + """ + if _Logger.CURRENT is not _Logger.DEFAULT: + sys.stderr.write("WARNING: You asked to start logging (dir=%s), but you never stopped the previous logger (dir=%s).\n"%(dir, _Logger.CURRENT.dir)) + _Logger.CURRENT = _Logger(dir=dir) + +def stop(): + if _Logger.CURRENT is _Logger.DEFAULT: + sys.stderr.write("WARNING: You asked to stop logging, but you never started any previous logger.\n"%(dir, _Logger.CURRENT.dir)) + return + _Logger.CURRENT.close() + _Logger.CURRENT = _Logger.DEFAULT + +def record_tabular(key, val): + """ + Log a value of some diagnostic + Call this once for each diagnostic quantity, each iteration + """ + _Logger.CURRENT.record_tabular(key, val) + +def dump_tabular(): + """ + Write all of the diagnostics from the current iteration + + level: int. (see logger.py docs) If the global logger level is higher than + the level argument here, don't print to stdout. + """ + _Logger.CURRENT.dump_tabular() + +def log(*args, level=INFO): + """ + Write the sequence of args, with no separators, to the console and output files (if you've configured an output file). + """ + _Logger.CURRENT.log(*args, level=level) + +def debug(*args): + log(*args, level=DEBUG) +def info(*args): + log(*args, level=INFO) +def warn(*args): + log(*args, level=WARN) +def error(*args): + log(*args, level=ERROR) + +def set_level(level): + """ + Set logging threshold on current logger. + """ + _Logger.CURRENT.set_level(level) + +def get_dir(): + """ + Get directory that log files are being written to. + will be None if there is no output directory (i.e., if you didn't call start) + """ + return _Logger.CURRENT.get_dir() + +def get_expt_dir(): + sys.stderr.write("get_expt_dir() is Deprecated. Switch to get_dir()\n") + return get_dir() + +# ================================================================ +# Backend +# ================================================================ + +class _Logger(object): + DEFAULT = None # A logger with no output files. (See right below class definition) + # So that you can still log to the terminal without setting up any output files + CURRENT = None # Current logger being used by the free functions above + + def __init__(self, dir=None): + self.name2val = OrderedDict() # values this iteration + self.level = INFO + self.dir = dir + self.text_outputs = [sys.stdout] + if dir is not None: + os.makedirs(dir, exist_ok=True) + self.text_outputs.append(open(os.path.join(dir, "log.txt"), "w")) + self.tbwriter = TbWriter(dir=dir, prefix="events") + else: + self.tbwriter = None + + # Logging API, forwarded + # ---------------------------------------- + def record_tabular(self, key, val): + self.name2val[key] = val + def dump_tabular(self): + # Create strings for printing + key2str = OrderedDict() + for (key,val) in self.name2val.items(): + if hasattr(val, "__float__"): valstr = "%-8.3g"%val + else: valstr = val + key2str[self._truncate(key)]=self._truncate(valstr) + keywidth = max(map(len, key2str.keys())) + valwidth = max(map(len, key2str.values())) + # Write to all text outputs + self._write_text("-"*(keywidth+valwidth+7), "\n") + for (key,val) in key2str.items(): + self._write_text("| ", key, " "*(keywidth-len(key)), " | ", val, " "*(valwidth-len(val)), " |\n") + self._write_text("-"*(keywidth+valwidth+7), "\n") + for f in self.text_outputs: + try: f.flush() + except OSError: sys.stderr.write('Warning! OSError when flushing.\n') + # Write to tensorboard + if self.tbwriter is not None: + self.tbwriter.write_values(self.name2val) + self.name2val.clear() + def log(self, *args, level=INFO): + if self.level <= level: + self._do_log(*args) + + # Configuration + # ---------------------------------------- + def set_level(self, level): + self.level = level + def get_dir(self): + return self.dir + + def close(self): + for f in self.text_outputs[1:]: f.close() + if self.tbwriter: self.tbwriter.close() + + # Misc + # ---------------------------------------- + def _do_log(self, *args): + self._write_text(*args, '\n') + for f in self.text_outputs: + try: f.flush() + except OSError: print('Warning! OSError when flushing.') + def _write_text(self, *strings): + for f in self.text_outputs: + for string in strings: + f.write(string) + def _truncate(self, s): + if len(s) > 33: + return s[:30] + "..." + else: + return s + +_Logger.DEFAULT = _Logger() +_Logger.CURRENT = _Logger.DEFAULT + + + + +def _demo(): + info("hi") + debug("shouldn't appear") + set_level(DEBUG) + debug("should appear") + dir = "/tmp/testlogging" + if os.path.exists(dir): + shutil.rmtree(dir) + start(dir=dir) + record_tabular("a", 3) + record_tabular("b", 2.5) + dump_tabular() + record_tabular("b", -2.5) + record_tabular("a", 5.5) + dump_tabular() + info("^^^ should see a = 5.5") + stop() + + try: + record_tabular("newthing", 5.5) + except AssertionError: + pass + + record_tabular("b", -2.5) + dump_tabular() + + + record_tabular("a", "asdfasdfasdf") + dump_tabular() + +if __name__ == "__main__": + _demo() diff --git a/es_distributed/tf_util.py b/es_distributed/tf_util.py new file mode 100644 index 00000000..3336e240 --- /dev/null +++ b/es_distributed/tf_util.py @@ -0,0 +1,292 @@ +import numpy as np +import tensorflow as tf # pylint: ignore-module +import builtins +import functools +import copy +import os + +# ================================================================ +# Import all names into common namespace +# ================================================================ + +clip = tf.clip_by_value + +# Make consistent with numpy +# ---------------------------------------- + +def sum(x, axis=None, keepdims=False): + return tf.reduce_sum(x, reduction_indices=None if axis is None else [axis], keep_dims = keepdims) +def mean(x, axis=None, keepdims=False): + return tf.reduce_mean(x, reduction_indices=None if axis is None else [axis], keep_dims = keepdims) +def var(x, axis=None, keepdims=False): + meanx = mean(x, axis=axis, keepdims=keepdims) + return mean(tf.square(x - meanx), axis=axis, keepdims=keepdims) +def std(x, axis=None, keepdims=False): + return tf.sqrt(var(x, axis=axis, keepdims=keepdims)) +def max(x, axis=None, keepdims=False): + return tf.reduce_max(x, reduction_indices=None if axis is None else [axis], keep_dims = keepdims) +def min(x, axis=None, keepdims=False): + return tf.reduce_min(x, reduction_indices=None if axis is None else [axis], keep_dims = keepdims) +def concatenate(arrs, axis=0): + return tf.concat(axis, arrs) +def argmax(x, axis=None): + return tf.argmax(x, dimension=axis) + +def switch(condition, then_expression, else_expression): + '''Switches between two operations depending on a scalar value (int or bool). + Note that both `then_expression` and `else_expression` + should be symbolic tensors of the *same shape*. + + # Arguments + condition: scalar tensor. + then_expression: TensorFlow operation. + else_expression: TensorFlow operation. + ''' + x_shape = copy.copy(then_expression.get_shape()) + x = tf.cond(tf.cast(condition, 'bool'), + lambda: then_expression, + lambda: else_expression) + x.set_shape(x_shape) + return x + +# Extras +# ---------------------------------------- +def l2loss(params): + if len(params) == 0: + return tf.constant(0.0) + else: + return tf.add_n([sum(tf.square(p)) for p in params]) +def lrelu(x, leak=0.2): + f1 = 0.5 * (1 + leak) + f2 = 0.5 * (1 - leak) + return f1 * x + f2 * abs(x) +def categorical_sample_logits(X): + # https://github.com/tensorflow/tensorflow/issues/456 + U = tf.random_uniform(tf.shape(X)) + return argmax(X - tf.log(-tf.log(U)), axis=1) + +# ================================================================ +# Global session +# ================================================================ + +def get_session(): + return tf.get_default_session() + +def single_threaded_session(): + tf_config = tf.ConfigProto( + inter_op_parallelism_threads=1, + intra_op_parallelism_threads=1) + return tf.Session(config=tf_config) + +ALREADY_INITIALIZED = set() +def initialize(): + new_variables = set(tf.all_variables()) - ALREADY_INITIALIZED + get_session().run(tf.initialize_variables(new_variables)) + ALREADY_INITIALIZED.update(new_variables) + + +def eval(expr, feed_dict=None): + if feed_dict is None: feed_dict = {} + return get_session().run(expr, feed_dict=feed_dict) + +def set_value(v, val): + get_session().run(v.assign(val)) + +def load_state(fname): + saver = tf.train.Saver() + saver.restore(get_session(), fname) + +def save_state(fname): + os.makedirs(os.path.dirname(fname), exist_ok=True) + saver = tf.train.Saver() + saver.save(get_session(), fname) + +# ================================================================ +# Model components +# ================================================================ + +def normc_initializer(std=1.0): + def _initializer(shape, dtype=None, partition_info=None): #pylint: disable=W0613 + def py_func_init(): + out = np.random.randn(np.prod(shape[:-1]), shape[-1]).astype(np.float32) + out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True)) + out = np.reshape(out, shape) + return out + + result = tf.py_func(py_func_init, [], tf.float32) + result.set_shape(shape) + return result + return _initializer + + +def _normalize(x, std): + def py_func_init(out): + shape = out.shape + out = np.reshape(out, [-1, shape[-1]]) + out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True)) + out = np.reshape(out, shape) + return out + + return x.assign(tf.py_func(py_func_init, [x], tf.float32)) + + +def conv(x, kernel_size, num_outputs, name, stride=1, padding="SAME", bias=True, std=1.0): + assert len(x.get_shape()) == 4 + w = tf.get_variable(name + "/w", [kernel_size, kernel_size, x.get_shape()[-1], num_outputs], initializer=normc_initializer(std)) + + w.reinitialize = _normalize(w, std=std) + + ret = tf.nn.conv2d(x, w, [1, stride, stride, 1], padding=padding) + if bias: + b = tf.get_variable(name + "/b", [1, 1, 1, num_outputs], initializer=tf.zeros_initializer) + + b.reinitialize = b.assign(tf.zeros_like(b)) + #b = tf.Print(b, [b, w], name + 'last_bias,w=' ) + return ret + b + else: + return ret + +def dense(x, size, name, weight_init=None, bias=True, std=1.0): + w = tf.get_variable(name + "/w", [x.get_shape()[1], size], initializer=weight_init) + + w.reinitialize = _normalize(w, std=std) + + ret = tf.matmul(x, w) + if bias: + b = tf.get_variable(name + "/b", [size], initializer=tf.zeros_initializer) + + b.reinitialize = b.assign(tf.zeros_like(b)) + #b = tf.Print(b, [b, w], name + 'last_bias,w=' ) + return ret + b + else: + return ret + +# ================================================================ +# Basic Stuff +# ================================================================ + +def function(inputs, outputs, updates=None, givens=None): + if isinstance(outputs, list): + return _Function(inputs, outputs, updates, givens=givens) + elif isinstance(outputs, dict): + f = _Function(inputs, outputs.values(), updates, givens=givens) + return lambda *inputs : dict(zip(outputs.keys(), f(*inputs))) + else: + f = _Function(inputs, [outputs], updates, givens=givens) + return lambda *inputs : f(*inputs)[0] + +class _Function(object): + def __init__(self, inputs, outputs, updates, givens, check_nan=False): + assert all(len(i.op.inputs)==0 for i in inputs), "inputs should all be placeholders" + self.inputs = inputs + updates = updates or [] + self.update_group = tf.group(*updates) + self.outputs_update = list(outputs) + [self.update_group] + self.givens = {} if givens is None else givens + self.check_nan = check_nan + def __call__(self, *inputvals): + assert len(inputvals) == len(self.inputs) + feed_dict = dict(zip(self.inputs, inputvals)) + feed_dict.update(self.givens) + results = get_session().run(self.outputs_update, feed_dict=feed_dict)[:-1] + if self.check_nan: + if any(np.isnan(r).any() for r in results): + raise RuntimeError("Nan detected") + return results + +# ================================================================ +# Graph traversal +# ================================================================ + +VARIABLES = {} + +# ================================================================ +# Flat vectors +# ================================================================ + +def var_shape(x): + out = [k.value for k in x.get_shape()] + assert all(isinstance(a, int) for a in out), \ + "shape function assumes that shape is fully known" + return out + +def numel(x): + return intprod(var_shape(x)) + +def intprod(x): + return int(np.prod(x)) + +def flatgrad(loss, var_list): + grads = tf.gradients(loss, var_list) + return tf.concat(0, [tf.reshape(grad, [numel(v)]) + for (v, grad) in zip(var_list, grads)]) + +class SetFromFlat(object): + def __init__(self, var_list, dtype=tf.float32): + assigns = [] + shapes = list(map(var_shape, var_list)) + total_size = np.sum([intprod(shape) for shape in shapes]) + + self.theta = theta = tf.placeholder(dtype,[total_size]) + start=0 + assigns = [] + for (shape,v) in zip(shapes,var_list): + size = intprod(shape) + assigns.append(tf.assign(v, tf.reshape(theta[start:start+size],shape))) + start+=size + assert start == total_size + self.op = tf.group(*assigns) + def __call__(self, theta): + get_session().run(self.op, feed_dict={self.theta:theta}) + +class GetFlat(object): + def __init__(self, var_list): + self.op = tf.concat(0, [tf.reshape(v, [numel(v)]) for v in var_list]) + def __call__(self): + return get_session().run(self.op) + +# ================================================================ +# Misc +# ================================================================ + +def scope_vars(scope, trainable_only): + """ + Get variables inside a scope + The scope can be specified as a string + """ + return tf.get_collection( + tf.GraphKeys.TRAINABLE_VARIABLES if trainable_only else tf.GraphKeys.VARIABLES, + scope=scope if isinstance(scope, str) else scope.name + ) + +def in_session(f): + @functools.wraps(f) + def newfunc(*args, **kwargs): + with tf.Session(): + f(*args, **kwargs) + return newfunc + + +_PLACEHOLDER_CACHE = {} # name -> (placeholder, dtype, shape) +def get_placeholder(name, dtype, shape): + print("calling get_placeholder", name) + if name in _PLACEHOLDER_CACHE: + out, dtype1, shape1 = _PLACEHOLDER_CACHE[name] + assert dtype1==dtype and shape1==shape + return out + else: + out = tf.placeholder(dtype=dtype, shape=shape, name=name) + _PLACEHOLDER_CACHE[name] = (out,dtype,shape) + return out +def get_placeholder_cached(name): + return _PLACEHOLDER_CACHE[name][0] + +def flattenallbut0(x): + return tf.reshape(x, [-1, intprod(x.get_shape().as_list()[1:])]) + +def reset(): + global _PLACEHOLDER_CACHE + global VARIABLES + _PLACEHOLDER_CACHE = {} + VARIABLES = {} + tf.reset_default_graph() diff --git a/extra/humanoid_maze.xml b/extra/humanoid_maze.xml new file mode 100644 index 00000000..569bc33d --- /dev/null +++ b/extra/humanoid_maze.xml @@ -0,0 +1,125 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/redis_config/redis_local_mirror.conf b/redis_config/redis_local_mirror.conf new file mode 100644 index 00000000..b7c83058 --- /dev/null +++ b/redis_config/redis_local_mirror.conf @@ -0,0 +1,1052 @@ +# Redis configuration file example. +# +# Note that in order to read the configuration file, Redis must be +# started with the file path as first argument: +# +# ./redis-server /path/to/redis.conf + +# Note on units: when memory size is needed, it is possible to specify +# it in the usual form of 1k 5GB 4M and so forth: +# +# 1k => 1000 bytes +# 1kb => 1024 bytes +# 1m => 1000000 bytes +# 1mb => 1024*1024 bytes +# 1g => 1000000000 bytes +# 1gb => 1024*1024*1024 bytes +# +# units are case insensitive so 1GB 1Gb 1gB are all the same. + +################################## INCLUDES ################################### + +# Include one or more other config files here. This is useful if you +# have a standard template that goes to all Redis servers but also need +# to customize a few per-server settings. Include files can include +# other files, so use this wisely. +# +# Notice option "include" won't be rewritten by command "CONFIG REWRITE" +# from admin or Redis Sentinel. Since Redis always uses the last processed +# line as value of a configuration directive, you'd better put includes +# at the beginning of this file to avoid overwriting config change at runtime. +# +# If instead you are interested in using includes to override configuration +# options, it is better to use include as the last line. +# +# include /path/to/local.conf +# include /path/to/other.conf + +################################## NETWORK ##################################### + +# By default, if no "bind" configuration directive is specified, Redis listens +# for connections from all the network interfaces available on the server. +# It is possible to listen to just one or multiple selected interfaces using +# the "bind" configuration directive, followed by one or more IP addresses. +# +# Examples: +# +# bind 192.168.1.100 10.0.0.1 +# bind 127.0.0.1 ::1 +# +# ~~~ WARNING ~~~ If the computer running Redis is directly exposed to the +# internet, binding to all the interfaces is dangerous and will expose the +# instance to everybody on the internet. So by default we uncomment the +# following bind directive, that will force Redis to listen only into +# the IPv4 lookback interface address (this means Redis will be able to +# accept connections only from clients running into the same computer it +# is running). +# +# IF YOU ARE SURE YOU WANT YOUR INSTANCE TO LISTEN TO ALL THE INTERFACES +# JUST COMMENT THE FOLLOWING LINE. +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +bind 127.0.0.1 + +# Protected mode is a layer of security protection, in order to avoid that +# Redis instances left open on the internet are accessed and exploited. +# +# When protected mode is on and if: +# +# 1) The server is not binding explicitly to a set of addresses using the +# "bind" directive. +# 2) No password is configured. +# +# The server only accepts connections from clients connecting from the +# IPv4 and IPv6 loopback addresses 127.0.0.1 and ::1, and from Unix domain +# sockets. +# +# By default protected mode is enabled. You should disable it only if +# you are sure you want clients from other hosts to connect to Redis +# even if no authentication is configured, nor a specific set of interfaces +# are explicitly listed using the "bind" directive. +protected-mode yes + +# Accept connections on the specified port, default is 6379 (IANA #815344). +# If port 0 is specified Redis will not listen on a TCP socket. +port 0 + +# TCP listen() backlog. +# +# In high requests-per-second environments you need an high backlog in order +# to avoid slow clients connections issues. Note that the Linux kernel +# will silently truncate it to the value of /proc/sys/net/core/somaxconn so +# make sure to raise both the value of somaxconn and tcp_max_syn_backlog +# in order to get the desired effect. +tcp-backlog 511 + +# Unix socket. +# +# Specify the path for the Unix socket that will be used to listen for +# incoming connections. There is no default, so Redis will not listen +# on a unix socket when not specified. +# +unixsocket /tmp/es_redis_relay.sock +unixsocketperm 700 + +# Close the connection after a client is idle for N seconds (0 to disable) +timeout 0 + +# TCP keepalive. +# +# If non-zero, use SO_KEEPALIVE to send TCP ACKs to clients in absence +# of communication. This is useful for two reasons: +# +# 1) Detect dead peers. +# 2) Take the connection alive from the point of view of network +# equipment in the middle. +# +# On Linux, the specified value (in seconds) is the period used to send ACKs. +# Note that to close the connection the double of the time is needed. +# On other kernels the period depends on the kernel configuration. +# +# A reasonable value for this option is 300 seconds, which is the new +# Redis default starting with Redis 3.2.1. +tcp-keepalive 300 + +################################# GENERAL ##################################### + +# By default Redis does not run as a daemon. Use 'yes' if you need it. +# Note that Redis will write a pid file in /var/run/redis.pid when daemonized. +daemonize no + +# If you run Redis from upstart or systemd, Redis can interact with your +# supervision tree. Options: +# supervised no - no supervision interaction +# supervised upstart - signal upstart by putting Redis into SIGSTOP mode +# supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET +# supervised auto - detect upstart or systemd method based on +# UPSTART_JOB or NOTIFY_SOCKET environment variables +# Note: these supervision methods only signal "process is ready." +# They do not enable continuous liveness pings back to your supervisor. +supervised no + +# If a pid file is specified, Redis writes it where specified at startup +# and removes it at exit. +# +# When the server runs non daemonized, no pid file is created if none is +# specified in the configuration. When the server is daemonized, the pid file +# is used even if not specified, defaulting to "/var/run/redis.pid". +# +# Creating a pid file is best effort: if Redis is not able to create it +# nothing bad happens, the server will start and run normally. +pidfile /var/run/redis_6379.pid + +# Specify the server verbosity level. +# This can be one of: +# debug (a lot of information, useful for development/testing) +# verbose (many rarely useful info, but not a mess like the debug level) +# notice (moderately verbose, what you want in production probably) +# warning (only very important / critical messages are logged) +loglevel notice + +# Specify the log file name. Also the empty string can be used to force +# Redis to log on the standard output. Note that if you use standard +# output for logging but daemonize, logs will be sent to /dev/null +logfile "" + +# To enable logging to the system logger, just set 'syslog-enabled' to yes, +# and optionally update the other syslog parameters to suit your needs. +# syslog-enabled no + +# Specify the syslog identity. +# syslog-ident redis + +# Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7. +# syslog-facility local0 + +# Set the number of databases. The default database is DB 0, you can select +# a different one on a per-connection basis using SELECT where +# dbid is a number between 0 and 'databases'-1 +databases 16 + +################################ SNAPSHOTTING ################################ +# +# Save the DB on disk: +# +# save +# +# Will save the DB if both the given number of seconds and the given +# number of write operations against the DB occurred. +# +# In the example below the behaviour will be to save: +# after 900 sec (15 min) if at least 1 key changed +# after 300 sec (5 min) if at least 10 keys changed +# after 60 sec if at least 10000 keys changed +# +# Note: you can disable saving completely by commenting out all "save" lines. +# +# It is also possible to remove all the previously configured save +# points by adding a save directive with a single empty string argument +# like in the following example: +# +# save "" + +# save 900 1 +# save 300 10 +# save 60 10000 + +# By default Redis will stop accepting writes if RDB snapshots are enabled +# (at least one save point) and the latest background save failed. +# This will make the user aware (in a hard way) that data is not persisting +# on disk properly, otherwise chances are that no one will notice and some +# disaster will happen. +# +# If the background saving process will start working again Redis will +# automatically allow writes again. +# +# However if you have setup your proper monitoring of the Redis server +# and persistence, you may want to disable this feature so that Redis will +# continue to work as usual even if there are problems with disk, +# permissions, and so forth. +stop-writes-on-bgsave-error yes + +# Compress string objects using LZF when dump .rdb databases? +# For default that's set to 'yes' as it's almost always a win. +# If you want to save some CPU in the saving child set it to 'no' but +# the dataset will likely be bigger if you have compressible values or keys. +rdbcompression yes + +# Since version 5 of RDB a CRC64 checksum is placed at the end of the file. +# This makes the format more resistant to corruption but there is a performance +# hit to pay (around 10%) when saving and loading RDB files, so you can disable it +# for maximum performances. +# +# RDB files created with checksum disabled have a checksum of zero that will +# tell the loading code to skip the check. +rdbchecksum yes + +# The filename where to dump the DB +dbfilename dump.rdb + +# The working directory. +# +# The DB will be written inside this directory, with the filename specified +# above using the 'dbfilename' configuration directive. +# +# The Append Only File will also be created inside this directory. +# +# Note that you must specify a directory here, not a file name. +dir ./ + +################################# REPLICATION ################################# + +# Master-Slave replication. Use slaveof to make a Redis instance a copy of +# another Redis server. A few things to understand ASAP about Redis replication. +# +# 1) Redis replication is asynchronous, but you can configure a master to +# stop accepting writes if it appears to be not connected with at least +# a given number of slaves. +# 2) Redis slaves are able to perform a partial resynchronization with the +# master if the replication link is lost for a relatively small amount of +# time. You may want to configure the replication backlog size (see the next +# sections of this file) with a sensible value depending on your needs. +# 3) Replication is automatic and does not need user intervention. After a +# network partition slaves automatically try to reconnect to masters +# and resynchronize with them. +# +# slaveof + +# If the master is password protected (using the "requirepass" configuration +# directive below) it is possible to tell the slave to authenticate before +# starting the replication synchronization process, otherwise the master will +# refuse the slave request. +# +# masterauth + +# When a slave loses its connection with the master, or when the replication +# is still in progress, the slave can act in two different ways: +# +# 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will +# still reply to client requests, possibly with out of date data, or the +# data set may just be empty if this is the first synchronization. +# +# 2) if slave-serve-stale-data is set to 'no' the slave will reply with +# an error "SYNC with master in progress" to all the kind of commands +# but to INFO and SLAVEOF. +# +slave-serve-stale-data yes + +# You can configure a slave instance to accept writes or not. Writing against +# a slave instance may be useful to store some ephemeral data (because data +# written on a slave will be easily deleted after resync with the master) but +# may also cause problems if clients are writing to it because of a +# misconfiguration. +# +# Since Redis 2.6 by default slaves are read-only. +# +# Note: read only slaves are not designed to be exposed to untrusted clients +# on the internet. It's just a protection layer against misuse of the instance. +# Still a read only slave exports by default all the administrative commands +# such as CONFIG, DEBUG, and so forth. To a limited extent you can improve +# security of read only slaves using 'rename-command' to shadow all the +# administrative / dangerous commands. +slave-read-only yes + +# Replication SYNC strategy: disk or socket. +# +# ------------------------------------------------------- +# WARNING: DISKLESS REPLICATION IS EXPERIMENTAL CURRENTLY +# ------------------------------------------------------- +# +# New slaves and reconnecting slaves that are not able to continue the replication +# process just receiving differences, need to do what is called a "full +# synchronization". An RDB file is transmitted from the master to the slaves. +# The transmission can happen in two different ways: +# +# 1) Disk-backed: The Redis master creates a new process that writes the RDB +# file on disk. Later the file is transferred by the parent +# process to the slaves incrementally. +# 2) Diskless: The Redis master creates a new process that directly writes the +# RDB file to slave sockets, without touching the disk at all. +# +# With disk-backed replication, while the RDB file is generated, more slaves +# can be queued and served with the RDB file as soon as the current child producing +# the RDB file finishes its work. With diskless replication instead once +# the transfer starts, new slaves arriving will be queued and a new transfer +# will start when the current one terminates. +# +# When diskless replication is used, the master waits a configurable amount of +# time (in seconds) before starting the transfer in the hope that multiple slaves +# will arrive and the transfer can be parallelized. +# +# With slow disks and fast (large bandwidth) networks, diskless replication +# works better. +repl-diskless-sync no + +# When diskless replication is enabled, it is possible to configure the delay +# the server waits in order to spawn the child that transfers the RDB via socket +# to the slaves. +# +# This is important since once the transfer starts, it is not possible to serve +# new slaves arriving, that will be queued for the next RDB transfer, so the server +# waits a delay in order to let more slaves arrive. +# +# The delay is specified in seconds, and by default is 5 seconds. To disable +# it entirely just set it to 0 seconds and the transfer will start ASAP. +repl-diskless-sync-delay 5 + +# Slaves send PINGs to server in a predefined interval. It's possible to change +# this interval with the repl_ping_slave_period option. The default value is 10 +# seconds. +# +# repl-ping-slave-period 10 + +# The following option sets the replication timeout for: +# +# 1) Bulk transfer I/O during SYNC, from the point of view of slave. +# 2) Master timeout from the point of view of slaves (data, pings). +# 3) Slave timeout from the point of view of masters (REPLCONF ACK pings). +# +# It is important to make sure that this value is greater than the value +# specified for repl-ping-slave-period otherwise a timeout will be detected +# every time there is low traffic between the master and the slave. +# +# repl-timeout 60 + +# Disable TCP_NODELAY on the slave socket after SYNC? +# +# If you select "yes" Redis will use a smaller number of TCP packets and +# less bandwidth to send data to slaves. But this can add a delay for +# the data to appear on the slave side, up to 40 milliseconds with +# Linux kernels using a default configuration. +# +# If you select "no" the delay for data to appear on the slave side will +# be reduced but more bandwidth will be used for replication. +# +# By default we optimize for low latency, but in very high traffic conditions +# or when the master and slaves are many hops away, turning this to "yes" may +# be a good idea. +repl-disable-tcp-nodelay no + +# Set the replication backlog size. The backlog is a buffer that accumulates +# slave data when slaves are disconnected for some time, so that when a slave +# wants to reconnect again, often a full resync is not needed, but a partial +# resync is enough, just passing the portion of data the slave missed while +# disconnected. +# +# The bigger the replication backlog, the longer the time the slave can be +# disconnected and later be able to perform a partial resynchronization. +# +# The backlog is only allocated once there is at least a slave connected. +# +# repl-backlog-size 1mb + +# After a master has no longer connected slaves for some time, the backlog +# will be freed. The following option configures the amount of seconds that +# need to elapse, starting from the time the last slave disconnected, for +# the backlog buffer to be freed. +# +# A value of 0 means to never release the backlog. +# +# repl-backlog-ttl 3600 + +# The slave priority is an integer number published by Redis in the INFO output. +# It is used by Redis Sentinel in order to select a slave to promote into a +# master if the master is no longer working correctly. +# +# A slave with a low priority number is considered better for promotion, so +# for instance if there are three slaves with priority 10, 100, 25 Sentinel will +# pick the one with priority 10, that is the lowest. +# +# However a special priority of 0 marks the slave as not able to perform the +# role of master, so a slave with priority of 0 will never be selected by +# Redis Sentinel for promotion. +# +# By default the priority is 100. +slave-priority 100 + +# It is possible for a master to stop accepting writes if there are less than +# N slaves connected, having a lag less or equal than M seconds. +# +# The N slaves need to be in "online" state. +# +# The lag in seconds, that must be <= the specified value, is calculated from +# the last ping received from the slave, that is usually sent every second. +# +# This option does not GUARANTEE that N replicas will accept the write, but +# will limit the window of exposure for lost writes in case not enough slaves +# are available, to the specified number of seconds. +# +# For example to require at least 3 slaves with a lag <= 10 seconds use: +# +# min-slaves-to-write 3 +# min-slaves-max-lag 10 +# +# Setting one or the other to 0 disables the feature. +# +# By default min-slaves-to-write is set to 0 (feature disabled) and +# min-slaves-max-lag is set to 10. + +# A Redis master is able to list the address and port of the attached +# slaves in different ways. For example the "INFO replication" section +# offers this information, which is used, among other tools, by +# Redis Sentinel in order to discover slave instances. +# Another place where this info is available is in the output of the +# "ROLE" command of a masteer. +# +# The listed IP and address normally reported by a slave is obtained +# in the following way: +# +# IP: The address is auto detected by checking the peer address +# of the socket used by the slave to connect with the master. +# +# Port: The port is communicated by the slave during the replication +# handshake, and is normally the port that the slave is using to +# list for connections. +# +# However when port forwarding or Network Address Translation (NAT) is +# used, the slave may be actually reachable via different IP and port +# pairs. The following two options can be used by a slave in order to +# report to its master a specific set of IP and port, so that both INFO +# and ROLE will report those values. +# +# There is no need to use both the options if you need to override just +# the port or the IP address. +# +# slave-announce-ip 5.5.5.5 +# slave-announce-port 1234 + +################################## SECURITY ################################### + +# Require clients to issue AUTH before processing any other +# commands. This might be useful in environments in which you do not trust +# others with access to the host running redis-server. +# +# This should stay commented out for backward compatibility and because most +# people do not need auth (e.g. they run their own servers). +# +# Warning: since Redis is pretty fast an outside user can try up to +# 150k passwords per second against a good box. This means that you should +# use a very strong password otherwise it will be very easy to break. +# +# requirepass foobared + +# Command renaming. +# +# It is possible to change the name of dangerous commands in a shared +# environment. For instance the CONFIG command may be renamed into something +# hard to guess so that it will still be available for internal-use tools +# but not available for general clients. +# +# Example: +# +# rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52 +# +# It is also possible to completely kill a command by renaming it into +# an empty string: +# +# rename-command CONFIG "" +# +# Please note that changing the name of commands that are logged into the +# AOF file or transmitted to slaves may cause problems. + +################################### LIMITS #################################### + +# Set the max number of connected clients at the same time. By default +# this limit is set to 10000 clients, however if the Redis server is not +# able to configure the process file limit to allow for the specified limit +# the max number of allowed clients is set to the current file limit +# minus 32 (as Redis reserves a few file descriptors for internal uses). +# +# Once the limit is reached Redis will close all the new connections sending +# an error 'max number of clients reached'. +# +# maxclients 10000 + +# Don't use more memory than the specified amount of bytes. +# When the memory limit is reached Redis will try to remove keys +# according to the eviction policy selected (see maxmemory-policy). +# +# If Redis can't remove keys according to the policy, or if the policy is +# set to 'noeviction', Redis will start to reply with errors to commands +# that would use more memory, like SET, LPUSH, and so on, and will continue +# to reply to read-only commands like GET. +# +# This option is usually useful when using Redis as an LRU cache, or to set +# a hard memory limit for an instance (using the 'noeviction' policy). +# +# WARNING: If you have slaves attached to an instance with maxmemory on, +# the size of the output buffers needed to feed the slaves are subtracted +# from the used memory count, so that network problems / resyncs will +# not trigger a loop where keys are evicted, and in turn the output +# buffer of slaves is full with DELs of keys evicted triggering the deletion +# of more keys, and so forth until the database is completely emptied. +# +# In short... if you have slaves attached it is suggested that you set a lower +# limit for maxmemory so that there is some free RAM on the system for slave +# output buffers (but this is not needed if the policy is 'noeviction'). +# +# maxmemory + +# MAXMEMORY POLICY: how Redis will select what to remove when maxmemory +# is reached. You can select among five behaviors: +# +# volatile-lru -> remove the key with an expire set using an LRU algorithm +# allkeys-lru -> remove any key according to the LRU algorithm +# volatile-random -> remove a random key with an expire set +# allkeys-random -> remove a random key, any key +# volatile-ttl -> remove the key with the nearest expire time (minor TTL) +# noeviction -> don't expire at all, just return an error on write operations +# +# Note: with any of the above policies, Redis will return an error on write +# operations, when there are no suitable keys for eviction. +# +# At the date of writing these commands are: set setnx setex append +# incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd +# sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby +# zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby +# getset mset msetnx exec sort +# +# The default is: +# +# maxmemory-policy noeviction + +# LRU and minimal TTL algorithms are not precise algorithms but approximated +# algorithms (in order to save memory), so you can tune it for speed or +# accuracy. For default Redis will check five keys and pick the one that was +# used less recently, you can change the sample size using the following +# configuration directive. +# +# The default of 5 produces good enough results. 10 Approximates very closely +# true LRU but costs a bit more CPU. 3 is very fast but not very accurate. +# +# maxmemory-samples 5 + +############################## APPEND ONLY MODE ############################### + +# By default Redis asynchronously dumps the dataset on disk. This mode is +# good enough in many applications, but an issue with the Redis process or +# a power outage may result into a few minutes of writes lost (depending on +# the configured save points). +# +# The Append Only File is an alternative persistence mode that provides +# much better durability. For instance using the default data fsync policy +# (see later in the config file) Redis can lose just one second of writes in a +# dramatic event like a server power outage, or a single write if something +# wrong with the Redis process itself happens, but the operating system is +# still running correctly. +# +# AOF and RDB persistence can be enabled at the same time without problems. +# If the AOF is enabled on startup Redis will load the AOF, that is the file +# with the better durability guarantees. +# +# Please check http://redis.io/topics/persistence for more information. + +appendonly no + +# The name of the append only file (default: "appendonly.aof") + +appendfilename "appendonly.aof" + +# The fsync() call tells the Operating System to actually write data on disk +# instead of waiting for more data in the output buffer. Some OS will really flush +# data on disk, some other OS will just try to do it ASAP. +# +# Redis supports three different modes: +# +# no: don't fsync, just let the OS flush the data when it wants. Faster. +# always: fsync after every write to the append only log. Slow, Safest. +# everysec: fsync only one time every second. Compromise. +# +# The default is "everysec", as that's usually the right compromise between +# speed and data safety. It's up to you to understand if you can relax this to +# "no" that will let the operating system flush the output buffer when +# it wants, for better performances (but if you can live with the idea of +# some data loss consider the default persistence mode that's snapshotting), +# or on the contrary, use "always" that's very slow but a bit safer than +# everysec. +# +# More details please check the following article: +# http://antirez.com/post/redis-persistence-demystified.html +# +# If unsure, use "everysec". + +# appendfsync always +appendfsync everysec +# appendfsync no + +# When the AOF fsync policy is set to always or everysec, and a background +# saving process (a background save or AOF log background rewriting) is +# performing a lot of I/O against the disk, in some Linux configurations +# Redis may block too long on the fsync() call. Note that there is no fix for +# this currently, as even performing fsync in a different thread will block +# our synchronous write(2) call. +# +# In order to mitigate this problem it's possible to use the following option +# that will prevent fsync() from being called in the main process while a +# BGSAVE or BGREWRITEAOF is in progress. +# +# This means that while another child is saving, the durability of Redis is +# the same as "appendfsync none". In practical terms, this means that it is +# possible to lose up to 30 seconds of log in the worst scenario (with the +# default Linux settings). +# +# If you have latency problems turn this to "yes". Otherwise leave it as +# "no" that is the safest pick from the point of view of durability. + +no-appendfsync-on-rewrite no + +# Automatic rewrite of the append only file. +# Redis is able to automatically rewrite the log file implicitly calling +# BGREWRITEAOF when the AOF log size grows by the specified percentage. +# +# This is how it works: Redis remembers the size of the AOF file after the +# latest rewrite (if no rewrite has happened since the restart, the size of +# the AOF at startup is used). +# +# This base size is compared to the current size. If the current size is +# bigger than the specified percentage, the rewrite is triggered. Also +# you need to specify a minimal size for the AOF file to be rewritten, this +# is useful to avoid rewriting the AOF file even if the percentage increase +# is reached but it is still pretty small. +# +# Specify a percentage of zero in order to disable the automatic AOF +# rewrite feature. + +auto-aof-rewrite-percentage 100 +auto-aof-rewrite-min-size 64mb + +# An AOF file may be found to be truncated at the end during the Redis +# startup process, when the AOF data gets loaded back into memory. +# This may happen when the system where Redis is running +# crashes, especially when an ext4 filesystem is mounted without the +# data=ordered option (however this can't happen when Redis itself +# crashes or aborts but the operating system still works correctly). +# +# Redis can either exit with an error when this happens, or load as much +# data as possible (the default now) and start if the AOF file is found +# to be truncated at the end. The following option controls this behavior. +# +# If aof-load-truncated is set to yes, a truncated AOF file is loaded and +# the Redis server starts emitting a log to inform the user of the event. +# Otherwise if the option is set to no, the server aborts with an error +# and refuses to start. When the option is set to no, the user requires +# to fix the AOF file using the "redis-check-aof" utility before to restart +# the server. +# +# Note that if the AOF file will be found to be corrupted in the middle +# the server will still exit with an error. This option only applies when +# Redis will try to read more data from the AOF file but not enough bytes +# will be found. +aof-load-truncated yes + +################################ LUA SCRIPTING ############################### + +# Max execution time of a Lua script in milliseconds. +# +# If the maximum execution time is reached Redis will log that a script is +# still in execution after the maximum allowed time and will start to +# reply to queries with an error. +# +# When a long running script exceeds the maximum execution time only the +# SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be +# used to stop a script that did not yet called write commands. The second +# is the only way to shut down the server in the case a write command was +# already issued by the script but the user doesn't want to wait for the natural +# termination of the script. +# +# Set it to 0 or a negative value for unlimited execution without warnings. +lua-time-limit 5000 + +################################ REDIS CLUSTER ############################### +# +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +# WARNING EXPERIMENTAL: Redis Cluster is considered to be stable code, however +# in order to mark it as "mature" we need to wait for a non trivial percentage +# of users to deploy it in production. +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +# +# Normal Redis instances can't be part of a Redis Cluster; only nodes that are +# started as cluster nodes can. In order to start a Redis instance as a +# cluster node enable the cluster support uncommenting the following: +# +# cluster-enabled yes + +# Every cluster node has a cluster configuration file. This file is not +# intended to be edited by hand. It is created and updated by Redis nodes. +# Every Redis Cluster node requires a different cluster configuration file. +# Make sure that instances running in the same system do not have +# overlapping cluster configuration file names. +# +# cluster-config-file nodes-6379.conf + +# Cluster node timeout is the amount of milliseconds a node must be unreachable +# for it to be considered in failure state. +# Most other internal time limits are multiple of the node timeout. +# +# cluster-node-timeout 15000 + +# A slave of a failing master will avoid to start a failover if its data +# looks too old. +# +# There is no simple way for a slave to actually have a exact measure of +# its "data age", so the following two checks are performed: +# +# 1) If there are multiple slaves able to failover, they exchange messages +# in order to try to give an advantage to the slave with the best +# replication offset (more data from the master processed). +# Slaves will try to get their rank by offset, and apply to the start +# of the failover a delay proportional to their rank. +# +# 2) Every single slave computes the time of the last interaction with +# its master. This can be the last ping or command received (if the master +# is still in the "connected" state), or the time that elapsed since the +# disconnection with the master (if the replication link is currently down). +# If the last interaction is too old, the slave will not try to failover +# at all. +# +# The point "2" can be tuned by user. Specifically a slave will not perform +# the failover if, since the last interaction with the master, the time +# elapsed is greater than: +# +# (node-timeout * slave-validity-factor) + repl-ping-slave-period +# +# So for example if node-timeout is 30 seconds, and the slave-validity-factor +# is 10, and assuming a default repl-ping-slave-period of 10 seconds, the +# slave will not try to failover if it was not able to talk with the master +# for longer than 310 seconds. +# +# A large slave-validity-factor may allow slaves with too old data to failover +# a master, while a too small value may prevent the cluster from being able to +# elect a slave at all. +# +# For maximum availability, it is possible to set the slave-validity-factor +# to a value of 0, which means, that slaves will always try to failover the +# master regardless of the last time they interacted with the master. +# (However they'll always try to apply a delay proportional to their +# offset rank). +# +# Zero is the only value able to guarantee that when all the partitions heal +# the cluster will always be able to continue. +# +# cluster-slave-validity-factor 10 + +# Cluster slaves are able to migrate to orphaned masters, that are masters +# that are left without working slaves. This improves the cluster ability +# to resist to failures as otherwise an orphaned master can't be failed over +# in case of failure if it has no working slaves. +# +# Slaves migrate to orphaned masters only if there are still at least a +# given number of other working slaves for their old master. This number +# is the "migration barrier". A migration barrier of 1 means that a slave +# will migrate only if there is at least 1 other working slave for its master +# and so forth. It usually reflects the number of slaves you want for every +# master in your cluster. +# +# Default is 1 (slaves migrate only if their masters remain with at least +# one slave). To disable migration just set it to a very large value. +# A value of 0 can be set but is useful only for debugging and dangerous +# in production. +# +# cluster-migration-barrier 1 + +# By default Redis Cluster nodes stop accepting queries if they detect there +# is at least an hash slot uncovered (no available node is serving it). +# This way if the cluster is partially down (for example a range of hash slots +# are no longer covered) all the cluster becomes, eventually, unavailable. +# It automatically returns available as soon as all the slots are covered again. +# +# However sometimes you want the subset of the cluster which is working, +# to continue to accept queries for the part of the key space that is still +# covered. In order to do so, just set the cluster-require-full-coverage +# option to no. +# +# cluster-require-full-coverage yes + +# In order to setup your cluster make sure to read the documentation +# available at http://redis.io web site. + +################################## SLOW LOG ################################### + +# The Redis Slow Log is a system to log queries that exceeded a specified +# execution time. The execution time does not include the I/O operations +# like talking with the client, sending the reply and so forth, +# but just the time needed to actually execute the command (this is the only +# stage of command execution where the thread is blocked and can not serve +# other requests in the meantime). +# +# You can configure the slow log with two parameters: one tells Redis +# what is the execution time, in microseconds, to exceed in order for the +# command to get logged, and the other parameter is the length of the +# slow log. When a new command is logged the oldest one is removed from the +# queue of logged commands. + +# The following time is expressed in microseconds, so 1000000 is equivalent +# to one second. Note that a negative number disables the slow log, while +# a value of zero forces the logging of every command. +slowlog-log-slower-than 10000 + +# There is no limit to this length. Just be aware that it will consume memory. +# You can reclaim memory used by the slow log with SLOWLOG RESET. +slowlog-max-len 128 + +################################ LATENCY MONITOR ############################## + +# The Redis latency monitoring subsystem samples different operations +# at runtime in order to collect data related to possible sources of +# latency of a Redis instance. +# +# Via the LATENCY command this information is available to the user that can +# print graphs and obtain reports. +# +# The system only logs operations that were performed in a time equal or +# greater than the amount of milliseconds specified via the +# latency-monitor-threshold configuration directive. When its value is set +# to zero, the latency monitor is turned off. +# +# By default latency monitoring is disabled since it is mostly not needed +# if you don't have latency issues, and collecting data has a performance +# impact, that while very small, can be measured under big load. Latency +# monitoring can easily be enabled at runtime using the command +# "CONFIG SET latency-monitor-threshold " if needed. +latency-monitor-threshold 0 + +############################# EVENT NOTIFICATION ############################## + +# Redis can notify Pub/Sub clients about events happening in the key space. +# This feature is documented at http://redis.io/topics/notifications +# +# For instance if keyspace events notification is enabled, and a client +# performs a DEL operation on key "foo" stored in the Database 0, two +# messages will be published via Pub/Sub: +# +# PUBLISH __keyspace@0__:foo del +# PUBLISH __keyevent@0__:del foo +# +# It is possible to select the events that Redis will notify among a set +# of classes. Every class is identified by a single character: +# +# K Keyspace events, published with __keyspace@__ prefix. +# E Keyevent events, published with __keyevent@__ prefix. +# g Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ... +# $ String commands +# l List commands +# s Set commands +# h Hash commands +# z Sorted set commands +# x Expired events (events generated every time a key expires) +# e Evicted events (events generated when a key is evicted for maxmemory) +# A Alias for g$lshzxe, so that the "AKE" string means all the events. +# +# The "notify-keyspace-events" takes as argument a string that is composed +# of zero or multiple characters. The empty string means that notifications +# are disabled. +# +# Example: to enable list and generic events, from the point of view of the +# event name, use: +# +# notify-keyspace-events Elg +# +# Example 2: to get the stream of the expired keys subscribing to channel +# name __keyevent@0__:expired use: +# +# notify-keyspace-events Ex +# +# By default all notifications are disabled because most users don't need +# this feature and the feature has some overhead. Note that if you don't +# specify at least one of K or E, no events will be delivered. +notify-keyspace-events "" + +############################### ADVANCED CONFIG ############################### + +# Hashes are encoded using a memory efficient data structure when they have a +# small number of entries, and the biggest entry does not exceed a given +# threshold. These thresholds can be configured using the following directives. +hash-max-ziplist-entries 512 +hash-max-ziplist-value 64 + +# Lists are also encoded in a special way to save a lot of space. +# The number of entries allowed per internal list node can be specified +# as a fixed maximum size or a maximum number of elements. +# For a fixed maximum size, use -5 through -1, meaning: +# -5: max size: 64 Kb <-- not recommended for normal workloads +# -4: max size: 32 Kb <-- not recommended +# -3: max size: 16 Kb <-- probably not recommended +# -2: max size: 8 Kb <-- good +# -1: max size: 4 Kb <-- good +# Positive numbers mean store up to _exactly_ that number of elements +# per list node. +# The highest performing option is usually -2 (8 Kb size) or -1 (4 Kb size), +# but if your use case is unique, adjust the settings as necessary. +list-max-ziplist-size -2 + +# Lists may also be compressed. +# Compress depth is the number of quicklist ziplist nodes from *each* side of +# the list to *exclude* from compression. The head and tail of the list +# are always uncompressed for fast push/pop operations. Settings are: +# 0: disable all list compression +# 1: depth 1 means "don't start compressing until after 1 node into the list, +# going from either the head or tail" +# So: [head]->node->node->...->node->[tail] +# [head], [tail] will always be uncompressed; inner nodes will compress. +# 2: [head]->[next]->node->node->...->node->[prev]->[tail] +# 2 here means: don't compress head or head->next or tail->prev or tail, +# but compress all nodes between them. +# 3: [head]->[next]->[next]->node->node->...->node->[prev]->[prev]->[tail] +# etc. +list-compress-depth 0 + +# Sets have a special encoding in just one case: when a set is composed +# of just strings that happen to be integers in radix 10 in the range +# of 64 bit signed integers. +# The following configuration setting sets the limit in the size of the +# set in order to use this special memory saving encoding. +set-max-intset-entries 512 + +# Similarly to hashes and lists, sorted sets are also specially encoded in +# order to save a lot of space. This encoding is only used when the length and +# elements of a sorted set are below the following limits: +zset-max-ziplist-entries 128 +zset-max-ziplist-value 64 + +# HyperLogLog sparse representation bytes limit. The limit includes the +# 16 bytes header. When an HyperLogLog using the sparse representation crosses +# this limit, it is converted into the dense representation. +# +# A value greater than 16000 is totally useless, since at that point the +# dense representation is more memory efficient. +# +# The suggested value is ~ 3000 in order to have the benefits of +# the space efficient encoding without slowing down too much PFADD, +# which is O(N) with the sparse encoding. The value can be raised to +# ~ 10000 when CPU is not a concern, but space is, and the data set is +# composed of many HyperLogLogs with cardinality in the 0 - 15000 range. +hll-sparse-max-bytes 3000 + +# Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in +# order to help rehashing the main Redis hash table (the one mapping top-level +# keys to values). The hash table implementation Redis uses (see dict.c) +# performs a lazy rehashing: the more operation you run into a hash table +# that is rehashing, the more rehashing "steps" are performed, so if the +# server is idle the rehashing is never complete and some more memory is used +# by the hash table. +# +# The default is to use this millisecond 10 times every second in order to +# actively rehash the main dictionaries, freeing memory when possible. +# +# If unsure: +# use "activerehashing no" if you have hard latency requirements and it is +# not a good thing in your environment that Redis can reply from time to time +# to queries with 2 milliseconds delay. +# +# use "activerehashing yes" if you don't have such hard requirements but +# want to free memory asap when possible. +activerehashing yes + +# The client output buffer limits can be used to force disconnection of clients +# that are not reading data from the server fast enough for some reason (a +# common reason is that a Pub/Sub client can't consume messages as fast as the +# publisher can produce them). +# +# The limit can be set differently for the three different classes of clients: +# +# normal -> normal clients including MONITOR clients +# slave -> slave clients +# pubsub -> clients subscribed to at least one pubsub channel or pattern +# +# The syntax of every client-output-buffer-limit directive is the following: +# +# client-output-buffer-limit +# +# A client is immediately disconnected once the hard limit is reached, or if +# the soft limit is reached and remains reached for the specified number of +# seconds (continuously). +# So for instance if the hard limit is 32 megabytes and the soft limit is +# 16 megabytes / 10 seconds, the client will get disconnected immediately +# if the size of the output buffers reach 32 megabytes, but will also get +# disconnected if the client reaches 16 megabytes and continuously overcomes +# the limit for 10 seconds. +# +# By default normal clients are not limited because they don't receive data +# without asking (in a push way), but just after a request, so only +# asynchronous clients may create a scenario where data is requested faster +# than it can read. +# +# Instead there is a default limit for pubsub and slave clients, since +# subscribers and slaves receive data in a push fashion. +# +# Both the hard or the soft limit can be disabled by setting them to zero. +client-output-buffer-limit normal 0 0 0 +client-output-buffer-limit slave 256mb 64mb 60 +client-output-buffer-limit pubsub 32mb 8mb 60 + +# Redis calls an internal function to perform many background tasks, like +# closing connections of clients in timeout, purging expired keys that are +# never requested, and so forth. +# +# Not all tasks are performed with the same frequency, but Redis checks for +# tasks to perform according to the specified "hz" value. +# +# By default "hz" is set to 10. Raising the value will use more CPU when +# Redis is idle, but at the same time will make Redis more responsive when +# there are many keys expiring at the same time, and timeouts may be +# handled with more precision. +# +# The range is between 1 and 500, however a value over 100 is usually not +# a good idea. Most users should use the default of 10 and raise this up to +# 100 only in environments where very low latency is required. +hz 10 + +# When a child rewrites the AOF file, if the following option is enabled +# the file will be fsync-ed every 32 MB of data generated. This is useful +# in order to commit the file to the disk more incrementally and avoid +# big latency spikes. +aof-rewrite-incremental-fsync yes diff --git a/redis_config/redis_master.conf b/redis_config/redis_master.conf new file mode 100644 index 00000000..9cd2e2cf --- /dev/null +++ b/redis_config/redis_master.conf @@ -0,0 +1,1052 @@ +# Redis configuration file example. +# +# Note that in order to read the configuration file, Redis must be +# started with the file path as first argument: +# +# ./redis-server /path/to/redis.conf + +# Note on units: when memory size is needed, it is possible to specify +# it in the usual form of 1k 5GB 4M and so forth: +# +# 1k => 1000 bytes +# 1kb => 1024 bytes +# 1m => 1000000 bytes +# 1mb => 1024*1024 bytes +# 1g => 1000000000 bytes +# 1gb => 1024*1024*1024 bytes +# +# units are case insensitive so 1GB 1Gb 1gB are all the same. + +################################## INCLUDES ################################### + +# Include one or more other config files here. This is useful if you +# have a standard template that goes to all Redis servers but also need +# to customize a few per-server settings. Include files can include +# other files, so use this wisely. +# +# Notice option "include" won't be rewritten by command "CONFIG REWRITE" +# from admin or Redis Sentinel. Since Redis always uses the last processed +# line as value of a configuration directive, you'd better put includes +# at the beginning of this file to avoid overwriting config change at runtime. +# +# If instead you are interested in using includes to override configuration +# options, it is better to use include as the last line. +# +# include /path/to/local.conf +# include /path/to/other.conf + +################################## NETWORK ##################################### + +# By default, if no "bind" configuration directive is specified, Redis listens +# for connections from all the network interfaces available on the server. +# It is possible to listen to just one or multiple selected interfaces using +# the "bind" configuration directive, followed by one or more IP addresses. +# +# Examples: +# +# bind 192.168.1.100 10.0.0.1 +# bind 127.0.0.1 ::1 +# +# ~~~ WARNING ~~~ If the computer running Redis is directly exposed to the +# internet, binding to all the interfaces is dangerous and will expose the +# instance to everybody on the internet. So by default we uncomment the +# following bind directive, that will force Redis to listen only into +# the IPv4 lookback interface address (this means Redis will be able to +# accept connections only from clients running into the same computer it +# is running). +# +# IF YOU ARE SURE YOU WANT YOUR INSTANCE TO LISTEN TO ALL THE INTERFACES +# JUST COMMENT THE FOLLOWING LINE. +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +bind 127.0.0.1 + +# Protected mode is a layer of security protection, in order to avoid that +# Redis instances left open on the internet are accessed and exploited. +# +# When protected mode is on and if: +# +# 1) The server is not binding explicitly to a set of addresses using the +# "bind" directive. +# 2) No password is configured. +# +# The server only accepts connections from clients connecting from the +# IPv4 and IPv6 loopback addresses 127.0.0.1 and ::1, and from Unix domain +# sockets. +# +# By default protected mode is enabled. You should disable it only if +# you are sure you want clients from other hosts to connect to Redis +# even if no authentication is configured, nor a specific set of interfaces +# are explicitly listed using the "bind" directive. +protected-mode yes + +# Accept connections on the specified port, default is 6379 (IANA #815344). +# If port 0 is specified Redis will not listen on a TCP socket. +port 6379 + +# TCP listen() backlog. +# +# In high requests-per-second environments you need an high backlog in order +# to avoid slow clients connections issues. Note that the Linux kernel +# will silently truncate it to the value of /proc/sys/net/core/somaxconn so +# make sure to raise both the value of somaxconn and tcp_max_syn_backlog +# in order to get the desired effect. +tcp-backlog 511 + +# Unix socket. +# +# Specify the path for the Unix socket that will be used to listen for +# incoming connections. There is no default, so Redis will not listen +# on a unix socket when not specified. +# +unixsocket /tmp/es_redis_master.sock +unixsocketperm 700 + +# Close the connection after a client is idle for N seconds (0 to disable) +timeout 0 + +# TCP keepalive. +# +# If non-zero, use SO_KEEPALIVE to send TCP ACKs to clients in absence +# of communication. This is useful for two reasons: +# +# 1) Detect dead peers. +# 2) Take the connection alive from the point of view of network +# equipment in the middle. +# +# On Linux, the specified value (in seconds) is the period used to send ACKs. +# Note that to close the connection the double of the time is needed. +# On other kernels the period depends on the kernel configuration. +# +# A reasonable value for this option is 300 seconds, which is the new +# Redis default starting with Redis 3.2.1. +tcp-keepalive 300 + +################################# GENERAL ##################################### + +# By default Redis does not run as a daemon. Use 'yes' if you need it. +# Note that Redis will write a pid file in /var/run/redis.pid when daemonized. +daemonize no + +# If you run Redis from upstart or systemd, Redis can interact with your +# supervision tree. Options: +# supervised no - no supervision interaction +# supervised upstart - signal upstart by putting Redis into SIGSTOP mode +# supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET +# supervised auto - detect upstart or systemd method based on +# UPSTART_JOB or NOTIFY_SOCKET environment variables +# Note: these supervision methods only signal "process is ready." +# They do not enable continuous liveness pings back to your supervisor. +supervised no + +# If a pid file is specified, Redis writes it where specified at startup +# and removes it at exit. +# +# When the server runs non daemonized, no pid file is created if none is +# specified in the configuration. When the server is daemonized, the pid file +# is used even if not specified, defaulting to "/var/run/redis.pid". +# +# Creating a pid file is best effort: if Redis is not able to create it +# nothing bad happens, the server will start and run normally. +pidfile /var/run/redis_6379.pid + +# Specify the server verbosity level. +# This can be one of: +# debug (a lot of information, useful for development/testing) +# verbose (many rarely useful info, but not a mess like the debug level) +# notice (moderately verbose, what you want in production probably) +# warning (only very important / critical messages are logged) +loglevel notice + +# Specify the log file name. Also the empty string can be used to force +# Redis to log on the standard output. Note that if you use standard +# output for logging but daemonize, logs will be sent to /dev/null +logfile "" + +# To enable logging to the system logger, just set 'syslog-enabled' to yes, +# and optionally update the other syslog parameters to suit your needs. +# syslog-enabled no + +# Specify the syslog identity. +# syslog-ident redis + +# Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7. +# syslog-facility local0 + +# Set the number of databases. The default database is DB 0, you can select +# a different one on a per-connection basis using SELECT where +# dbid is a number between 0 and 'databases'-1 +databases 16 + +################################ SNAPSHOTTING ################################ +# +# Save the DB on disk: +# +# save +# +# Will save the DB if both the given number of seconds and the given +# number of write operations against the DB occurred. +# +# In the example below the behaviour will be to save: +# after 900 sec (15 min) if at least 1 key changed +# after 300 sec (5 min) if at least 10 keys changed +# after 60 sec if at least 10000 keys changed +# +# Note: you can disable saving completely by commenting out all "save" lines. +# +# It is also possible to remove all the previously configured save +# points by adding a save directive with a single empty string argument +# like in the following example: +# +# save "" + +# save 900 1 +# save 300 10 +# save 60 10000 + +# By default Redis will stop accepting writes if RDB snapshots are enabled +# (at least one save point) and the latest background save failed. +# This will make the user aware (in a hard way) that data is not persisting +# on disk properly, otherwise chances are that no one will notice and some +# disaster will happen. +# +# If the background saving process will start working again Redis will +# automatically allow writes again. +# +# However if you have setup your proper monitoring of the Redis server +# and persistence, you may want to disable this feature so that Redis will +# continue to work as usual even if there are problems with disk, +# permissions, and so forth. +stop-writes-on-bgsave-error yes + +# Compress string objects using LZF when dump .rdb databases? +# For default that's set to 'yes' as it's almost always a win. +# If you want to save some CPU in the saving child set it to 'no' but +# the dataset will likely be bigger if you have compressible values or keys. +rdbcompression yes + +# Since version 5 of RDB a CRC64 checksum is placed at the end of the file. +# This makes the format more resistant to corruption but there is a performance +# hit to pay (around 10%) when saving and loading RDB files, so you can disable it +# for maximum performances. +# +# RDB files created with checksum disabled have a checksum of zero that will +# tell the loading code to skip the check. +rdbchecksum yes + +# The filename where to dump the DB +dbfilename dump.rdb + +# The working directory. +# +# The DB will be written inside this directory, with the filename specified +# above using the 'dbfilename' configuration directive. +# +# The Append Only File will also be created inside this directory. +# +# Note that you must specify a directory here, not a file name. +dir ./ + +################################# REPLICATION ################################# + +# Master-Slave replication. Use slaveof to make a Redis instance a copy of +# another Redis server. A few things to understand ASAP about Redis replication. +# +# 1) Redis replication is asynchronous, but you can configure a master to +# stop accepting writes if it appears to be not connected with at least +# a given number of slaves. +# 2) Redis slaves are able to perform a partial resynchronization with the +# master if the replication link is lost for a relatively small amount of +# time. You may want to configure the replication backlog size (see the next +# sections of this file) with a sensible value depending on your needs. +# 3) Replication is automatic and does not need user intervention. After a +# network partition slaves automatically try to reconnect to masters +# and resynchronize with them. +# +# slaveof + +# If the master is password protected (using the "requirepass" configuration +# directive below) it is possible to tell the slave to authenticate before +# starting the replication synchronization process, otherwise the master will +# refuse the slave request. +# +# masterauth + +# When a slave loses its connection with the master, or when the replication +# is still in progress, the slave can act in two different ways: +# +# 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will +# still reply to client requests, possibly with out of date data, or the +# data set may just be empty if this is the first synchronization. +# +# 2) if slave-serve-stale-data is set to 'no' the slave will reply with +# an error "SYNC with master in progress" to all the kind of commands +# but to INFO and SLAVEOF. +# +slave-serve-stale-data yes + +# You can configure a slave instance to accept writes or not. Writing against +# a slave instance may be useful to store some ephemeral data (because data +# written on a slave will be easily deleted after resync with the master) but +# may also cause problems if clients are writing to it because of a +# misconfiguration. +# +# Since Redis 2.6 by default slaves are read-only. +# +# Note: read only slaves are not designed to be exposed to untrusted clients +# on the internet. It's just a protection layer against misuse of the instance. +# Still a read only slave exports by default all the administrative commands +# such as CONFIG, DEBUG, and so forth. To a limited extent you can improve +# security of read only slaves using 'rename-command' to shadow all the +# administrative / dangerous commands. +slave-read-only yes + +# Replication SYNC strategy: disk or socket. +# +# ------------------------------------------------------- +# WARNING: DISKLESS REPLICATION IS EXPERIMENTAL CURRENTLY +# ------------------------------------------------------- +# +# New slaves and reconnecting slaves that are not able to continue the replication +# process just receiving differences, need to do what is called a "full +# synchronization". An RDB file is transmitted from the master to the slaves. +# The transmission can happen in two different ways: +# +# 1) Disk-backed: The Redis master creates a new process that writes the RDB +# file on disk. Later the file is transferred by the parent +# process to the slaves incrementally. +# 2) Diskless: The Redis master creates a new process that directly writes the +# RDB file to slave sockets, without touching the disk at all. +# +# With disk-backed replication, while the RDB file is generated, more slaves +# can be queued and served with the RDB file as soon as the current child producing +# the RDB file finishes its work. With diskless replication instead once +# the transfer starts, new slaves arriving will be queued and a new transfer +# will start when the current one terminates. +# +# When diskless replication is used, the master waits a configurable amount of +# time (in seconds) before starting the transfer in the hope that multiple slaves +# will arrive and the transfer can be parallelized. +# +# With slow disks and fast (large bandwidth) networks, diskless replication +# works better. +repl-diskless-sync no + +# When diskless replication is enabled, it is possible to configure the delay +# the server waits in order to spawn the child that transfers the RDB via socket +# to the slaves. +# +# This is important since once the transfer starts, it is not possible to serve +# new slaves arriving, that will be queued for the next RDB transfer, so the server +# waits a delay in order to let more slaves arrive. +# +# The delay is specified in seconds, and by default is 5 seconds. To disable +# it entirely just set it to 0 seconds and the transfer will start ASAP. +repl-diskless-sync-delay 5 + +# Slaves send PINGs to server in a predefined interval. It's possible to change +# this interval with the repl_ping_slave_period option. The default value is 10 +# seconds. +# +# repl-ping-slave-period 10 + +# The following option sets the replication timeout for: +# +# 1) Bulk transfer I/O during SYNC, from the point of view of slave. +# 2) Master timeout from the point of view of slaves (data, pings). +# 3) Slave timeout from the point of view of masters (REPLCONF ACK pings). +# +# It is important to make sure that this value is greater than the value +# specified for repl-ping-slave-period otherwise a timeout will be detected +# every time there is low traffic between the master and the slave. +# +# repl-timeout 60 + +# Disable TCP_NODELAY on the slave socket after SYNC? +# +# If you select "yes" Redis will use a smaller number of TCP packets and +# less bandwidth to send data to slaves. But this can add a delay for +# the data to appear on the slave side, up to 40 milliseconds with +# Linux kernels using a default configuration. +# +# If you select "no" the delay for data to appear on the slave side will +# be reduced but more bandwidth will be used for replication. +# +# By default we optimize for low latency, but in very high traffic conditions +# or when the master and slaves are many hops away, turning this to "yes" may +# be a good idea. +repl-disable-tcp-nodelay no + +# Set the replication backlog size. The backlog is a buffer that accumulates +# slave data when slaves are disconnected for some time, so that when a slave +# wants to reconnect again, often a full resync is not needed, but a partial +# resync is enough, just passing the portion of data the slave missed while +# disconnected. +# +# The bigger the replication backlog, the longer the time the slave can be +# disconnected and later be able to perform a partial resynchronization. +# +# The backlog is only allocated once there is at least a slave connected. +# +# repl-backlog-size 1mb + +# After a master has no longer connected slaves for some time, the backlog +# will be freed. The following option configures the amount of seconds that +# need to elapse, starting from the time the last slave disconnected, for +# the backlog buffer to be freed. +# +# A value of 0 means to never release the backlog. +# +# repl-backlog-ttl 3600 + +# The slave priority is an integer number published by Redis in the INFO output. +# It is used by Redis Sentinel in order to select a slave to promote into a +# master if the master is no longer working correctly. +# +# A slave with a low priority number is considered better for promotion, so +# for instance if there are three slaves with priority 10, 100, 25 Sentinel will +# pick the one with priority 10, that is the lowest. +# +# However a special priority of 0 marks the slave as not able to perform the +# role of master, so a slave with priority of 0 will never be selected by +# Redis Sentinel for promotion. +# +# By default the priority is 100. +slave-priority 100 + +# It is possible for a master to stop accepting writes if there are less than +# N slaves connected, having a lag less or equal than M seconds. +# +# The N slaves need to be in "online" state. +# +# The lag in seconds, that must be <= the specified value, is calculated from +# the last ping received from the slave, that is usually sent every second. +# +# This option does not GUARANTEE that N replicas will accept the write, but +# will limit the window of exposure for lost writes in case not enough slaves +# are available, to the specified number of seconds. +# +# For example to require at least 3 slaves with a lag <= 10 seconds use: +# +# min-slaves-to-write 3 +# min-slaves-max-lag 10 +# +# Setting one or the other to 0 disables the feature. +# +# By default min-slaves-to-write is set to 0 (feature disabled) and +# min-slaves-max-lag is set to 10. + +# A Redis master is able to list the address and port of the attached +# slaves in different ways. For example the "INFO replication" section +# offers this information, which is used, among other tools, by +# Redis Sentinel in order to discover slave instances. +# Another place where this info is available is in the output of the +# "ROLE" command of a masteer. +# +# The listed IP and address normally reported by a slave is obtained +# in the following way: +# +# IP: The address is auto detected by checking the peer address +# of the socket used by the slave to connect with the master. +# +# Port: The port is communicated by the slave during the replication +# handshake, and is normally the port that the slave is using to +# list for connections. +# +# However when port forwarding or Network Address Translation (NAT) is +# used, the slave may be actually reachable via different IP and port +# pairs. The following two options can be used by a slave in order to +# report to its master a specific set of IP and port, so that both INFO +# and ROLE will report those values. +# +# There is no need to use both the options if you need to override just +# the port or the IP address. +# +# slave-announce-ip 5.5.5.5 +# slave-announce-port 1234 + +################################## SECURITY ################################### + +# Require clients to issue AUTH before processing any other +# commands. This might be useful in environments in which you do not trust +# others with access to the host running redis-server. +# +# This should stay commented out for backward compatibility and because most +# people do not need auth (e.g. they run their own servers). +# +# Warning: since Redis is pretty fast an outside user can try up to +# 150k passwords per second against a good box. This means that you should +# use a very strong password otherwise it will be very easy to break. +# +# requirepass foobared + +# Command renaming. +# +# It is possible to change the name of dangerous commands in a shared +# environment. For instance the CONFIG command may be renamed into something +# hard to guess so that it will still be available for internal-use tools +# but not available for general clients. +# +# Example: +# +# rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52 +# +# It is also possible to completely kill a command by renaming it into +# an empty string: +# +# rename-command CONFIG "" +# +# Please note that changing the name of commands that are logged into the +# AOF file or transmitted to slaves may cause problems. + +################################### LIMITS #################################### + +# Set the max number of connected clients at the same time. By default +# this limit is set to 10000 clients, however if the Redis server is not +# able to configure the process file limit to allow for the specified limit +# the max number of allowed clients is set to the current file limit +# minus 32 (as Redis reserves a few file descriptors for internal uses). +# +# Once the limit is reached Redis will close all the new connections sending +# an error 'max number of clients reached'. +# +# maxclients 10000 + +# Don't use more memory than the specified amount of bytes. +# When the memory limit is reached Redis will try to remove keys +# according to the eviction policy selected (see maxmemory-policy). +# +# If Redis can't remove keys according to the policy, or if the policy is +# set to 'noeviction', Redis will start to reply with errors to commands +# that would use more memory, like SET, LPUSH, and so on, and will continue +# to reply to read-only commands like GET. +# +# This option is usually useful when using Redis as an LRU cache, or to set +# a hard memory limit for an instance (using the 'noeviction' policy). +# +# WARNING: If you have slaves attached to an instance with maxmemory on, +# the size of the output buffers needed to feed the slaves are subtracted +# from the used memory count, so that network problems / resyncs will +# not trigger a loop where keys are evicted, and in turn the output +# buffer of slaves is full with DELs of keys evicted triggering the deletion +# of more keys, and so forth until the database is completely emptied. +# +# In short... if you have slaves attached it is suggested that you set a lower +# limit for maxmemory so that there is some free RAM on the system for slave +# output buffers (but this is not needed if the policy is 'noeviction'). +# +# maxmemory + +# MAXMEMORY POLICY: how Redis will select what to remove when maxmemory +# is reached. You can select among five behaviors: +# +# volatile-lru -> remove the key with an expire set using an LRU algorithm +# allkeys-lru -> remove any key according to the LRU algorithm +# volatile-random -> remove a random key with an expire set +# allkeys-random -> remove a random key, any key +# volatile-ttl -> remove the key with the nearest expire time (minor TTL) +# noeviction -> don't expire at all, just return an error on write operations +# +# Note: with any of the above policies, Redis will return an error on write +# operations, when there are no suitable keys for eviction. +# +# At the date of writing these commands are: set setnx setex append +# incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd +# sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby +# zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby +# getset mset msetnx exec sort +# +# The default is: +# +# maxmemory-policy noeviction + +# LRU and minimal TTL algorithms are not precise algorithms but approximated +# algorithms (in order to save memory), so you can tune it for speed or +# accuracy. For default Redis will check five keys and pick the one that was +# used less recently, you can change the sample size using the following +# configuration directive. +# +# The default of 5 produces good enough results. 10 Approximates very closely +# true LRU but costs a bit more CPU. 3 is very fast but not very accurate. +# +# maxmemory-samples 5 + +############################## APPEND ONLY MODE ############################### + +# By default Redis asynchronously dumps the dataset on disk. This mode is +# good enough in many applications, but an issue with the Redis process or +# a power outage may result into a few minutes of writes lost (depending on +# the configured save points). +# +# The Append Only File is an alternative persistence mode that provides +# much better durability. For instance using the default data fsync policy +# (see later in the config file) Redis can lose just one second of writes in a +# dramatic event like a server power outage, or a single write if something +# wrong with the Redis process itself happens, but the operating system is +# still running correctly. +# +# AOF and RDB persistence can be enabled at the same time without problems. +# If the AOF is enabled on startup Redis will load the AOF, that is the file +# with the better durability guarantees. +# +# Please check http://redis.io/topics/persistence for more information. + +appendonly no + +# The name of the append only file (default: "appendonly.aof") + +appendfilename "appendonly.aof" + +# The fsync() call tells the Operating System to actually write data on disk +# instead of waiting for more data in the output buffer. Some OS will really flush +# data on disk, some other OS will just try to do it ASAP. +# +# Redis supports three different modes: +# +# no: don't fsync, just let the OS flush the data when it wants. Faster. +# always: fsync after every write to the append only log. Slow, Safest. +# everysec: fsync only one time every second. Compromise. +# +# The default is "everysec", as that's usually the right compromise between +# speed and data safety. It's up to you to understand if you can relax this to +# "no" that will let the operating system flush the output buffer when +# it wants, for better performances (but if you can live with the idea of +# some data loss consider the default persistence mode that's snapshotting), +# or on the contrary, use "always" that's very slow but a bit safer than +# everysec. +# +# More details please check the following article: +# http://antirez.com/post/redis-persistence-demystified.html +# +# If unsure, use "everysec". + +# appendfsync always +appendfsync everysec +# appendfsync no + +# When the AOF fsync policy is set to always or everysec, and a background +# saving process (a background save or AOF log background rewriting) is +# performing a lot of I/O against the disk, in some Linux configurations +# Redis may block too long on the fsync() call. Note that there is no fix for +# this currently, as even performing fsync in a different thread will block +# our synchronous write(2) call. +# +# In order to mitigate this problem it's possible to use the following option +# that will prevent fsync() from being called in the main process while a +# BGSAVE or BGREWRITEAOF is in progress. +# +# This means that while another child is saving, the durability of Redis is +# the same as "appendfsync none". In practical terms, this means that it is +# possible to lose up to 30 seconds of log in the worst scenario (with the +# default Linux settings). +# +# If you have latency problems turn this to "yes". Otherwise leave it as +# "no" that is the safest pick from the point of view of durability. + +no-appendfsync-on-rewrite no + +# Automatic rewrite of the append only file. +# Redis is able to automatically rewrite the log file implicitly calling +# BGREWRITEAOF when the AOF log size grows by the specified percentage. +# +# This is how it works: Redis remembers the size of the AOF file after the +# latest rewrite (if no rewrite has happened since the restart, the size of +# the AOF at startup is used). +# +# This base size is compared to the current size. If the current size is +# bigger than the specified percentage, the rewrite is triggered. Also +# you need to specify a minimal size for the AOF file to be rewritten, this +# is useful to avoid rewriting the AOF file even if the percentage increase +# is reached but it is still pretty small. +# +# Specify a percentage of zero in order to disable the automatic AOF +# rewrite feature. + +auto-aof-rewrite-percentage 100 +auto-aof-rewrite-min-size 64mb + +# An AOF file may be found to be truncated at the end during the Redis +# startup process, when the AOF data gets loaded back into memory. +# This may happen when the system where Redis is running +# crashes, especially when an ext4 filesystem is mounted without the +# data=ordered option (however this can't happen when Redis itself +# crashes or aborts but the operating system still works correctly). +# +# Redis can either exit with an error when this happens, or load as much +# data as possible (the default now) and start if the AOF file is found +# to be truncated at the end. The following option controls this behavior. +# +# If aof-load-truncated is set to yes, a truncated AOF file is loaded and +# the Redis server starts emitting a log to inform the user of the event. +# Otherwise if the option is set to no, the server aborts with an error +# and refuses to start. When the option is set to no, the user requires +# to fix the AOF file using the "redis-check-aof" utility before to restart +# the server. +# +# Note that if the AOF file will be found to be corrupted in the middle +# the server will still exit with an error. This option only applies when +# Redis will try to read more data from the AOF file but not enough bytes +# will be found. +aof-load-truncated yes + +################################ LUA SCRIPTING ############################### + +# Max execution time of a Lua script in milliseconds. +# +# If the maximum execution time is reached Redis will log that a script is +# still in execution after the maximum allowed time and will start to +# reply to queries with an error. +# +# When a long running script exceeds the maximum execution time only the +# SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be +# used to stop a script that did not yet called write commands. The second +# is the only way to shut down the server in the case a write command was +# already issued by the script but the user doesn't want to wait for the natural +# termination of the script. +# +# Set it to 0 or a negative value for unlimited execution without warnings. +lua-time-limit 5000 + +################################ REDIS CLUSTER ############################### +# +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +# WARNING EXPERIMENTAL: Redis Cluster is considered to be stable code, however +# in order to mark it as "mature" we need to wait for a non trivial percentage +# of users to deploy it in production. +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +# +# Normal Redis instances can't be part of a Redis Cluster; only nodes that are +# started as cluster nodes can. In order to start a Redis instance as a +# cluster node enable the cluster support uncommenting the following: +# +# cluster-enabled yes + +# Every cluster node has a cluster configuration file. This file is not +# intended to be edited by hand. It is created and updated by Redis nodes. +# Every Redis Cluster node requires a different cluster configuration file. +# Make sure that instances running in the same system do not have +# overlapping cluster configuration file names. +# +# cluster-config-file nodes-6379.conf + +# Cluster node timeout is the amount of milliseconds a node must be unreachable +# for it to be considered in failure state. +# Most other internal time limits are multiple of the node timeout. +# +# cluster-node-timeout 15000 + +# A slave of a failing master will avoid to start a failover if its data +# looks too old. +# +# There is no simple way for a slave to actually have a exact measure of +# its "data age", so the following two checks are performed: +# +# 1) If there are multiple slaves able to failover, they exchange messages +# in order to try to give an advantage to the slave with the best +# replication offset (more data from the master processed). +# Slaves will try to get their rank by offset, and apply to the start +# of the failover a delay proportional to their rank. +# +# 2) Every single slave computes the time of the last interaction with +# its master. This can be the last ping or command received (if the master +# is still in the "connected" state), or the time that elapsed since the +# disconnection with the master (if the replication link is currently down). +# If the last interaction is too old, the slave will not try to failover +# at all. +# +# The point "2" can be tuned by user. Specifically a slave will not perform +# the failover if, since the last interaction with the master, the time +# elapsed is greater than: +# +# (node-timeout * slave-validity-factor) + repl-ping-slave-period +# +# So for example if node-timeout is 30 seconds, and the slave-validity-factor +# is 10, and assuming a default repl-ping-slave-period of 10 seconds, the +# slave will not try to failover if it was not able to talk with the master +# for longer than 310 seconds. +# +# A large slave-validity-factor may allow slaves with too old data to failover +# a master, while a too small value may prevent the cluster from being able to +# elect a slave at all. +# +# For maximum availability, it is possible to set the slave-validity-factor +# to a value of 0, which means, that slaves will always try to failover the +# master regardless of the last time they interacted with the master. +# (However they'll always try to apply a delay proportional to their +# offset rank). +# +# Zero is the only value able to guarantee that when all the partitions heal +# the cluster will always be able to continue. +# +# cluster-slave-validity-factor 10 + +# Cluster slaves are able to migrate to orphaned masters, that are masters +# that are left without working slaves. This improves the cluster ability +# to resist to failures as otherwise an orphaned master can't be failed over +# in case of failure if it has no working slaves. +# +# Slaves migrate to orphaned masters only if there are still at least a +# given number of other working slaves for their old master. This number +# is the "migration barrier". A migration barrier of 1 means that a slave +# will migrate only if there is at least 1 other working slave for its master +# and so forth. It usually reflects the number of slaves you want for every +# master in your cluster. +# +# Default is 1 (slaves migrate only if their masters remain with at least +# one slave). To disable migration just set it to a very large value. +# A value of 0 can be set but is useful only for debugging and dangerous +# in production. +# +# cluster-migration-barrier 1 + +# By default Redis Cluster nodes stop accepting queries if they detect there +# is at least an hash slot uncovered (no available node is serving it). +# This way if the cluster is partially down (for example a range of hash slots +# are no longer covered) all the cluster becomes, eventually, unavailable. +# It automatically returns available as soon as all the slots are covered again. +# +# However sometimes you want the subset of the cluster which is working, +# to continue to accept queries for the part of the key space that is still +# covered. In order to do so, just set the cluster-require-full-coverage +# option to no. +# +# cluster-require-full-coverage yes + +# In order to setup your cluster make sure to read the documentation +# available at http://redis.io web site. + +################################## SLOW LOG ################################### + +# The Redis Slow Log is a system to log queries that exceeded a specified +# execution time. The execution time does not include the I/O operations +# like talking with the client, sending the reply and so forth, +# but just the time needed to actually execute the command (this is the only +# stage of command execution where the thread is blocked and can not serve +# other requests in the meantime). +# +# You can configure the slow log with two parameters: one tells Redis +# what is the execution time, in microseconds, to exceed in order for the +# command to get logged, and the other parameter is the length of the +# slow log. When a new command is logged the oldest one is removed from the +# queue of logged commands. + +# The following time is expressed in microseconds, so 1000000 is equivalent +# to one second. Note that a negative number disables the slow log, while +# a value of zero forces the logging of every command. +slowlog-log-slower-than 10000 + +# There is no limit to this length. Just be aware that it will consume memory. +# You can reclaim memory used by the slow log with SLOWLOG RESET. +slowlog-max-len 128 + +################################ LATENCY MONITOR ############################## + +# The Redis latency monitoring subsystem samples different operations +# at runtime in order to collect data related to possible sources of +# latency of a Redis instance. +# +# Via the LATENCY command this information is available to the user that can +# print graphs and obtain reports. +# +# The system only logs operations that were performed in a time equal or +# greater than the amount of milliseconds specified via the +# latency-monitor-threshold configuration directive. When its value is set +# to zero, the latency monitor is turned off. +# +# By default latency monitoring is disabled since it is mostly not needed +# if you don't have latency issues, and collecting data has a performance +# impact, that while very small, can be measured under big load. Latency +# monitoring can easily be enabled at runtime using the command +# "CONFIG SET latency-monitor-threshold " if needed. +latency-monitor-threshold 0 + +############################# EVENT NOTIFICATION ############################## + +# Redis can notify Pub/Sub clients about events happening in the key space. +# This feature is documented at http://redis.io/topics/notifications +# +# For instance if keyspace events notification is enabled, and a client +# performs a DEL operation on key "foo" stored in the Database 0, two +# messages will be published via Pub/Sub: +# +# PUBLISH __keyspace@0__:foo del +# PUBLISH __keyevent@0__:del foo +# +# It is possible to select the events that Redis will notify among a set +# of classes. Every class is identified by a single character: +# +# K Keyspace events, published with __keyspace@__ prefix. +# E Keyevent events, published with __keyevent@__ prefix. +# g Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ... +# $ String commands +# l List commands +# s Set commands +# h Hash commands +# z Sorted set commands +# x Expired events (events generated every time a key expires) +# e Evicted events (events generated when a key is evicted for maxmemory) +# A Alias for g$lshzxe, so that the "AKE" string means all the events. +# +# The "notify-keyspace-events" takes as argument a string that is composed +# of zero or multiple characters. The empty string means that notifications +# are disabled. +# +# Example: to enable list and generic events, from the point of view of the +# event name, use: +# +# notify-keyspace-events Elg +# +# Example 2: to get the stream of the expired keys subscribing to channel +# name __keyevent@0__:expired use: +# +# notify-keyspace-events Ex +# +# By default all notifications are disabled because most users don't need +# this feature and the feature has some overhead. Note that if you don't +# specify at least one of K or E, no events will be delivered. +notify-keyspace-events "" + +############################### ADVANCED CONFIG ############################### + +# Hashes are encoded using a memory efficient data structure when they have a +# small number of entries, and the biggest entry does not exceed a given +# threshold. These thresholds can be configured using the following directives. +hash-max-ziplist-entries 512 +hash-max-ziplist-value 64 + +# Lists are also encoded in a special way to save a lot of space. +# The number of entries allowed per internal list node can be specified +# as a fixed maximum size or a maximum number of elements. +# For a fixed maximum size, use -5 through -1, meaning: +# -5: max size: 64 Kb <-- not recommended for normal workloads +# -4: max size: 32 Kb <-- not recommended +# -3: max size: 16 Kb <-- probably not recommended +# -2: max size: 8 Kb <-- good +# -1: max size: 4 Kb <-- good +# Positive numbers mean store up to _exactly_ that number of elements +# per list node. +# The highest performing option is usually -2 (8 Kb size) or -1 (4 Kb size), +# but if your use case is unique, adjust the settings as necessary. +list-max-ziplist-size -2 + +# Lists may also be compressed. +# Compress depth is the number of quicklist ziplist nodes from *each* side of +# the list to *exclude* from compression. The head and tail of the list +# are always uncompressed for fast push/pop operations. Settings are: +# 0: disable all list compression +# 1: depth 1 means "don't start compressing until after 1 node into the list, +# going from either the head or tail" +# So: [head]->node->node->...->node->[tail] +# [head], [tail] will always be uncompressed; inner nodes will compress. +# 2: [head]->[next]->node->node->...->node->[prev]->[tail] +# 2 here means: don't compress head or head->next or tail->prev or tail, +# but compress all nodes between them. +# 3: [head]->[next]->[next]->node->node->...->node->[prev]->[prev]->[tail] +# etc. +list-compress-depth 0 + +# Sets have a special encoding in just one case: when a set is composed +# of just strings that happen to be integers in radix 10 in the range +# of 64 bit signed integers. +# The following configuration setting sets the limit in the size of the +# set in order to use this special memory saving encoding. +set-max-intset-entries 512 + +# Similarly to hashes and lists, sorted sets are also specially encoded in +# order to save a lot of space. This encoding is only used when the length and +# elements of a sorted set are below the following limits: +zset-max-ziplist-entries 128 +zset-max-ziplist-value 64 + +# HyperLogLog sparse representation bytes limit. The limit includes the +# 16 bytes header. When an HyperLogLog using the sparse representation crosses +# this limit, it is converted into the dense representation. +# +# A value greater than 16000 is totally useless, since at that point the +# dense representation is more memory efficient. +# +# The suggested value is ~ 3000 in order to have the benefits of +# the space efficient encoding without slowing down too much PFADD, +# which is O(N) with the sparse encoding. The value can be raised to +# ~ 10000 when CPU is not a concern, but space is, and the data set is +# composed of many HyperLogLogs with cardinality in the 0 - 15000 range. +hll-sparse-max-bytes 3000 + +# Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in +# order to help rehashing the main Redis hash table (the one mapping top-level +# keys to values). The hash table implementation Redis uses (see dict.c) +# performs a lazy rehashing: the more operation you run into a hash table +# that is rehashing, the more rehashing "steps" are performed, so if the +# server is idle the rehashing is never complete and some more memory is used +# by the hash table. +# +# The default is to use this millisecond 10 times every second in order to +# actively rehash the main dictionaries, freeing memory when possible. +# +# If unsure: +# use "activerehashing no" if you have hard latency requirements and it is +# not a good thing in your environment that Redis can reply from time to time +# to queries with 2 milliseconds delay. +# +# use "activerehashing yes" if you don't have such hard requirements but +# want to free memory asap when possible. +activerehashing yes + +# The client output buffer limits can be used to force disconnection of clients +# that are not reading data from the server fast enough for some reason (a +# common reason is that a Pub/Sub client can't consume messages as fast as the +# publisher can produce them). +# +# The limit can be set differently for the three different classes of clients: +# +# normal -> normal clients including MONITOR clients +# slave -> slave clients +# pubsub -> clients subscribed to at least one pubsub channel or pattern +# +# The syntax of every client-output-buffer-limit directive is the following: +# +# client-output-buffer-limit +# +# A client is immediately disconnected once the hard limit is reached, or if +# the soft limit is reached and remains reached for the specified number of +# seconds (continuously). +# So for instance if the hard limit is 32 megabytes and the soft limit is +# 16 megabytes / 10 seconds, the client will get disconnected immediately +# if the size of the output buffers reach 32 megabytes, but will also get +# disconnected if the client reaches 16 megabytes and continuously overcomes +# the limit for 10 seconds. +# +# By default normal clients are not limited because they don't receive data +# without asking (in a push way), but just after a request, so only +# asynchronous clients may create a scenario where data is requested faster +# than it can read. +# +# Instead there is a default limit for pubsub and slave clients, since +# subscribers and slaves receive data in a push fashion. +# +# Both the hard or the soft limit can be disabled by setting them to zero. +client-output-buffer-limit normal 0 0 0 +client-output-buffer-limit slave 256mb 64mb 60 +client-output-buffer-limit pubsub 32mb 8mb 60 + +# Redis calls an internal function to perform many background tasks, like +# closing connections of clients in timeout, purging expired keys that are +# never requested, and so forth. +# +# Not all tasks are performed with the same frequency, but Redis checks for +# tasks to perform according to the specified "hz" value. +# +# By default "hz" is set to 10. Raising the value will use more CPU when +# Redis is idle, but at the same time will make Redis more responsive when +# there are many keys expiring at the same time, and timeouts may be +# handled with more precision. +# +# The range is between 1 and 500, however a value over 100 is usually not +# a good idea. Most users should use the default of 10 and raise this up to +# 100 only in environments where very low latency is required. +hz 10 + +# When a child rewrites the AOF file, if the following option is enabled +# the file will be fsync-ed every 32 MB of data generated. This is useful +# in order to commit the file to the disk more incrementally and avoid +# big latency spikes. +aof-rewrite-incremental-fsync yes diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..a5fb2356 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,16 @@ +appdirs==1.4.3 +click==6.7 +gym==0.9.4 +h5py==2.7.0 +mujoco-py==0.5.7 +numpy==1.12.1 +packaging==16.8 +pyglet==1.2.4 +PyOpenGL==3.1.0 +pyparsing==2.2.0 +redis==2.10.5 +requests==2.14.2 +six==1.10.0 +tensorflow==0.12.1 +Werkzeug==0.12.1 +atari-py==0.1.1 diff --git a/scripts/dependency.sh b/scripts/dependency.sh new file mode 100644 index 00000000..a87d6c05 --- /dev/null +++ b/scripts/dependency.sh @@ -0,0 +1,117 @@ +#!/bin/bash + +# from ami-d8bdebb8 + +set -x + +sudo apt-get update +sudo apt-get install -y build-essential cmake git wget htop + +# Build and install a new version of redis +# https://www.digitalocean.com/community/tutorials/how-to-install-and-configure-redis-on-ubuntu-16-04 +wget --quiet http://download.redis.io/releases/redis-3.2.7.tar.gz -O redis-3.2.7.tar.gz +tar -xvzf redis-3.2.7.tar.gz +cd redis-3.2.7 +make +sudo make install +sudo mkdir /etc/redis +sudo cp redis.conf /etc/redis +cd .. +rm -rf redis-3.2.7 redis-3.2.7.tar.gz + +# Set up redis working directory +sudo sed -ie 's/dir \.\//dir \/var\/lib\/redis/' /etc/redis/redis.conf +sudo mkdir /var/lib/redis +sudo chown ubuntu:ubuntu /var/lib/redis + +# rely on firewall for security +sudo sed -ie "s/bind 127.0.0.1//" /etc/redis/redis.conf +sudo sed -ie "s/protected-mode yes/protected-mode no/" /etc/redis/redis.conf + +# System settings for redis +echo "vm.overcommit_memory=1" | sudo tee -a /etc/sysctl.conf +sudo sysctl vm.overcommit_memory=1 +sudo apt-get install -y hugepages +echo "sudo hugeadm --thp-never" | sudo tee /etc/profile.d/disable_thp.sh > /dev/null +. /etc/profile.d/disable_thp.sh + +# Start redis with systemctl +# sudo sed -ie "s/supervised no/supervised systemd/" /etc/redis/redis.conf +# ^ doesn't seem to matter; if it's enabled, the logs show "systemd supervision requested, but NOTIFY_SOCKET not found" +echo " +[Unit] +Description=Redis In-Memory Data Store +After=network.target + +[Service] +User=ubuntu +Group=ubuntu +ExecStart=/usr/local/bin/redis-server /etc/redis/redis.conf +ExecStop=/usr/local/bin/redis-cli shutdown +Restart=always + +[Install] +WantedBy=multi-user.target +" | sudo tee /etc/systemd/system/redis.service > /dev/null +sudo systemctl start redis + +# anaconda +sudo echo 'export PATH=/opt/conda/bin:$PATH' | sudo tee /etc/profile.d/conda.sh > /dev/null +sudo wget --quiet https://repo.continuum.io/archive/Anaconda3-4.2.0-Linux-x86_64.sh -O ~/anaconda.sh +sudo /bin/bash ~/anaconda.sh -b -p /opt/conda +sudo rm -f ~/anaconda.sh +. /etc/profile.d/conda.sh +sudo /opt/conda/bin/conda update -y --all + +# additional python dependencies +sudo /opt/conda/bin/conda install -y numpy scipy opencv + +# Mujoco +sudo mkdir -p /opt/mujoco + +####################################################### +# WRITE CODE HERE TO PLACE MUJOCO 1.31 in /opt/mujoco # +# The key file should be in /opt/mujoco/mjkey.txt # +# Mujoco should be installed in /opt/mujoco/mjpro131 # +####################################################### + +sudo echo 'export MUJOCO_PY_MJKEY_PATH=/opt/mujoco/mjkey.txt' | sudo tee /etc/profile.d/mujoco.sh > /dev/null +sudo echo 'export MUJOCO_PY_MJPRO_PATH=/opt/mujoco/mjpro131' | sudo tee -a /etc/profile.d/mujoco.sh > /dev/null +. /etc/profile.d/mujoco.sh + +# ALE +sudo /opt/conda/bin/conda install -y libgcc # ALE needs this for some reason +sudo apt-get install -y libsdl1.2-dev +git clone https://github.com/mgbellemare/Arcade-Learning-Environment +cd Arcade-Learning-Environment +mkdir build +cd build +cmake .. -DUSE_SDL=on +make +cd .. +sudo /opt/conda/bin/pip install . +cd .. +rm -rf Arcade-Learning-Environment + +# Tensorflow 0.11.0 +sudo /opt/conda/bin/conda install -c conda-forge -y tensorflow=0.11.0 + +# Gym +sudo apt-get install -y zlib1g-dev libjpeg-dev xvfb libav-tools xorg-dev libboost-all-dev libsdl2-dev swig freeglut3 libgl1 libglu1 +sudo /opt/conda/bin/conda install -y pyopengl +sudo /opt/conda/bin/pip install \ + 'gym[atari,classic_control,mujoco]' \ + PyYAML \ + six==1.10.0 \ + awscli + +# pip redis +sudo /opt/conda/bin/pip install redis + +# cleanup +sudo /opt/conda/bin/conda clean -y --all +sudo apt-get clean +sudo rm -rf /var/lib/apt/lists/* +# cleanup pip cache? + +set +x diff --git a/scripts/ec2ctl b/scripts/ec2ctl new file mode 100755 index 00000000..7721d176 --- /dev/null +++ b/scripts/ec2ctl @@ -0,0 +1,317 @@ +#!/usr/bin/env python + +import json +import logging +import multiprocessing +import os +import sys + +import boto3 +import click + +aws_access_key_id = os.environ.get("AWS_ACCESS_KEY", None) +aws_secret_access_key = os.environ.get("AWS_ACCESS_SECRET", None) +REGIONS = [ + "us-west-1", +] + +def highlight(x): + if not isinstance(x, str): + x = json.dumps(x, sort_keys=True, indent=2) + click.secho(x, fg='green') + +DEBUG_LOGGING_MAP = { + 0: logging.CRITICAL, + 1: logging.WARNING, + 2: logging.INFO, + 3: logging.DEBUG +} + + +@click.group() +@click.option('--verbose', '-v', + help="Sets the debug noise level, specify multiple times " + "for more verbosity.", + type=click.IntRange(0, 3, clamp=True), + count=True) +@click.pass_context +def cli(ctx, verbose): + logger_handler = logging.StreamHandler(sys.stderr) + logger_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) + logging.getLogger().addHandler(logger_handler) + logging.getLogger().setLevel(DEBUG_LOGGING_MAP.get(verbose, logging.DEBUG)) + + +def get_clients(): + regions = REGIONS + clients = [] + for region in regions: + client = boto3.client( + "ec2", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + client.region = region + clients.append(client) + scaling_clients = [] + for region in regions: + client = boto3.client( + "autoscaling", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + client.region = region + scaling_clients.append(client) + return zip(clients, scaling_clients) + + +def _collect_instances(region): + client = boto3.client( + "ec2", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + print("Collecting instances in region", region) + instances = [x['Instances'][0] for x in client.describe_instances( + Filters=[ + { + 'Name': 'tag:es_dist_role', + 'Values': [ + "master" + ] + }, + { + 'Name': 'instance-state-name', + 'Values': [ + 'running' + ] + }, + ] + )['Reservations']] + for instance in instances: + instance['Region'] = region + return instances + +def _collect_scaling_groups(region): + client = boto3.client( + "autoscaling", + region_name=region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + print("Collecting scaling groups in region", region) + resp = client.describe_auto_scaling_groups() + assert "NextToken" not in resp, "did not program to handle pagination" + groups = list(filter( + lambda grp_dict: any(tag["Key"] == "es_dist_role" for tag in grp_dict["Tags"]), + resp["AutoScalingGroups"] + )) + for group in groups: + group["Region"] = region + return groups + + +def get_all_instances(): + # with multiprocessing.Pool(10) as pool: + # all_instances = sum(pool.map(_collect_instances, REGIONS), []) + all_instances = sum(map(_collect_instances, REGIONS), []) + + return all_instances + +def get_all_scaling_groups(): + # with multiprocessing.Pool(10) as pool: + # all_instances = sum(pool.map(_collect_scaling_groups, REGIONS), []) + all_instances = sum(map(_collect_scaling_groups, REGIONS), []) + + return all_instances + +def get_all_x(name): + return eval("get_all_%s" % name)() + +def get_tag(key, stuff): + if 'Tags' in stuff: + try: + tags = stuff['Tags'] + name_tag = [t for t in tags if t['Key'] == key][0] + return name_tag['Value'] + except IndexError: + return None + return None + +def get_name_tag(instance): + return get_tag("Name", instance) + + +def get_exp_prefix_tag(instance): + return get_tag("exp_prefix", instance) + +def get_exp_name_tag(instance): + return get_tag("exp_name", instance) + +@cli.command() +def jobs(): + jobs = [] + with multiprocessing.Pool(2) as pool: + master_instances, groups = pool.map(get_all_x, ["instances", "scaling_groups"]) + group_map = dict( + (get_exp_name_tag(group), group) for group in groups + ) + for instance in master_instances: + name = get_exp_name_tag(instance) + group = group_map[name] + jobs.append( + "{} (#workers: {}/{})".format( + name, len(group["Instances"]), + group["DesiredCapacity"], + ) + ) + for job in sorted(jobs): + click.secho(job, fg='green') + + +def get_instances_by_pattern(job): + for instance in get_all_instances(): + name = get_name_tag(instance) + if job in name: + yield instance + +def get_groups_by_pattern(job): + for instance in get_all_scaling_groups(): + name = get_name_tag(instance) + if job in name: + yield instance + +@cli.command() +@click.argument('pattern') +def ssh(pattern): + for instance in get_instances_by_pattern(pattern): + name = get_name_tag(instance) + print("Connecting to %s" % name) + ip_addr = instance['PublicIpAddress'] + exp_prefix = get_exp_prefix_tag(instance) + + command = " ".join([ + "ssh", + "-oStrictHostKeyChecking=no", + "-oConnectTimeout=10", + "-t", + "ubuntu@" + ip_addr, + ]) + print(command) + os.system(command) + return + print("Not found!") + +@cli.command() +@click.argument('pattern') +def tail(pattern): + for instance in get_instances_by_pattern(pattern): + name = get_name_tag(instance) + print("Connecting to %s" % name) + ip_addr = instance['PublicIpAddress'] + exp_prefix = get_exp_prefix_tag(instance) + command = " ".join([ + "ssh", + "-oStrictHostKeyChecking=no", + "-oConnectTimeout=10", + "-t", + "ubuntu@" + ip_addr, + "'tail -f -n 2000 user_data.log && exec bash -l'" + ]) + print(command) + os.system(command) + return + +@cli.command() +@click.argument('pattern') +@click.argument('size') +def resize(pattern, size): + size = int(size) + groups_to_resize = list(get_groups_by_pattern(pattern)) + if not groups_to_resize: + print("No match found") + return + + names = sorted(map(get_exp_name_tag, groups_to_resize)) + print("This will resize the following jobs to {}:".format(size)) + click.secho("\n".join(names), fg="blue") + click.confirm('Continue?', abort=True) + + for name, group in zip(names, groups_to_resize): + print("Resizing %s to %s" % (name, size)) + client = boto3.client( + "autoscaling", + region_name=group["Region"], + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + client.update_auto_scaling_group( + AutoScalingGroupName=group["AutoScalingGroupName"], + MinSize=size, + MaxSize=size, + DesiredCapacity=size, + ) + + +@cli.command() +@click.argument('pattern') +def kill(pattern): + to_kill = [] + to_kill_instances = {} + for instance in get_instances_by_pattern(pattern): + region = instance['Region'] + name = get_exp_name_tag(instance) + if region not in to_kill_instances: + to_kill_instances[region] = [] + to_kill_instances[region].append(instance) + to_kill.append(name) + + print("This will kill the following jobs:") + click.secho("\n".join(sorted(to_kill)), fg="red") + click.confirm('Continue?', abort=True) + + for ec2_client, scaling_client in get_clients(): + print("Terminating instances in region", ec2_client.region) + instances = to_kill_instances.get(ec2_client.region, []) + if len(instances) > 0: + for instance in instances: + exp_name = get_exp_name_tag(instance) + print("Cleaning up cleaning group and config for %s" % exp_name) + _ = scaling_client.delete_auto_scaling_group( + AutoScalingGroupName=exp_name, + ForceDelete=True, + ) + _ = scaling_client.delete_launch_configuration( + LaunchConfigurationName=exp_name + ) + ec2_client.terminate_instances( + InstanceIds=[instance["InstanceId"] for instance in instances] + ) + + +@cli.command() +@click.argument('pattern') +@click.argument('dest') +@click.option('--dry-run', is_flag=True) +def sync(pattern, dest, dry_run): + for instance in get_instances_by_pattern(pattern): + name = get_name_tag(instance) + highlight(name) + cmd = ( + "rsync {dry_run} -zav --progress " + "-e 'ssh -oStrictHostKeyChecking=no -oConnectTimeout=10' " + "ubuntu@{ip}:~ {dest}" + ).format( + dry_run='--dry-run' if dry_run else '', + ip=instance['PublicIpAddress'], + dest=os.path.join(dest, get_exp_name_tag(instance)) + ) + highlight(cmd) + os.system(cmd) + + +if __name__ == '__main__': + cli() diff --git a/scripts/launch.py b/scripts/launch.py new file mode 100644 index 00000000..db64ca4c --- /dev/null +++ b/scripts/launch.py @@ -0,0 +1,300 @@ +import datetime +import json +import os + +import click + +AMI_MAP = { + "us-west-1": "FILL IN YOUR AMI HERE", +} + + +def highlight(x): + if not isinstance(x, str): + x = json.dumps(x, sort_keys=True, indent=2) + click.secho(x, fg='green') + + +def upload_archive(exp_name, archive_excludes, s3_bucket): + import hashlib, os.path as osp, subprocess, tempfile, uuid, sys + + # Archive this package + thisfile_dir = osp.dirname(osp.abspath(__file__)) + pkg_parent_dir = osp.abspath(osp.join(thisfile_dir, '..', '..')) + pkg_subdir = osp.basename(osp.abspath(osp.join(thisfile_dir, '..'))) + assert osp.abspath(__file__) == osp.join(pkg_parent_dir, pkg_subdir, 'scripts', 'launch.py'), 'You moved me!' + + # Run tar + tmpdir = tempfile.TemporaryDirectory() + local_archive_path = osp.join(tmpdir.name, '{}.tar.gz'.format(uuid.uuid4())) + tar_cmd = ["tar", "-zcvf", local_archive_path, "-C", pkg_parent_dir] + for pattern in archive_excludes: + tar_cmd += ["--exclude", pattern] + tar_cmd += ["-h", pkg_subdir] + highlight(" ".join(tar_cmd)) + + if sys.platform == 'darwin': + # Prevent Mac tar from adding ._* files + env = os.environ.copy() + env['COPYFILE_DISABLE'] = '1' + subprocess.check_call(tar_cmd, env=env) + else: + subprocess.check_call(tar_cmd) + + # Construct remote path to place the archive on S3 + with open(local_archive_path, 'rb') as f: + archive_hash = hashlib.sha224(f.read()).hexdigest() + remote_archive_path = '{}/{}_{}.tar.gz'.format(s3_bucket, exp_name, archive_hash) + + # Upload + upload_cmd = ["aws", "s3", "cp", local_archive_path, remote_archive_path] + highlight(" ".join(upload_cmd)) + subprocess.check_call(upload_cmd) + + presign_cmd = ["aws", "s3", "presign", remote_archive_path, "--expires-in", str(60 * 60 * 24 * 30)] + highlight(" ".join(presign_cmd)) + remote_url = subprocess.check_output(presign_cmd).decode("utf-8").strip() + return remote_url + + +def make_disable_hyperthreading_script(): + return """ +# disable hyperthreading +# https://forums.aws.amazon.com/message.jspa?messageID=189757 +for cpunum in $( + cat /sys/devices/system/cpu/cpu*/topology/thread_siblings_list | + sed 's/-/,/g' | cut -s -d, -f2- | tr ',' '\n' | sort -un); do + echo 0 > /sys/devices/system/cpu/cpu$cpunum/online +done +""" + + +def make_download_and_run_script(code_url, cmd): + return """su -l ubuntu <<'EOF' +set -x +cd ~ +wget --quiet "{code_url}" -O code.tar.gz +tar xvaf code.tar.gz +rm code.tar.gz +cd es-distributed +{cmd} +EOF +""".format(code_url=code_url, cmd=cmd) + + +def make_master_script(code_url, exp_str): + cmd = """ +cat > ~/experiment.json <<< '{exp_str}' +python -m es_distributed.main master \ + --master_socket_path /var/run/redis/redis.sock \ + --log_dir ~ \ + --exp_file ~/experiment.json + """.format(exp_str=exp_str) + return """#!/bin/bash +{ +set -x + +%s + +# Disable redis snapshots +echo 'save ""' >> /etc/redis/redis.conf + +# Make the unix domain socket available for the master client +# (TCP is still enabled for workers/relays) +echo "unixsocket /var/run/redis/redis.sock" >> /etc/redis/redis.conf +echo "unixsocketperm 777" >> /etc/redis/redis.conf +mkdir -p /var/run/redis +chown ubuntu:ubuntu /var/run/redis + +systemctl restart redis + +%s +} >> /home/ubuntu/user_data.log 2>&1 +""" % (make_disable_hyperthreading_script(), make_download_and_run_script(code_url, cmd)) + + +def make_worker_script(code_url, master_private_ip): + cmd = ("MKL_NUM_THREADS=1 OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 " + "python -m es_distributed.main workers " + "--master_host {} " + "--relay_socket_path /var/run/redis/redis.sock").format(master_private_ip) + return """#!/bin/bash +{ +set -x + +%s + +# Disable redis snapshots +echo 'save ""' >> /etc/redis/redis.conf + +# Make redis use a unix domain socket and disable TCP sockets +sed -ie "s/port 6379/port 0/" /etc/redis/redis.conf +echo "unixsocket /var/run/redis/redis.sock" >> /etc/redis/redis.conf +echo "unixsocketperm 777" >> /etc/redis/redis.conf +mkdir -p /var/run/redis +chown ubuntu:ubuntu /var/run/redis + +systemctl restart redis + +%s +} >> /home/ubuntu/user_data.log 2>&1 +""" % (make_disable_hyperthreading_script(), make_download_and_run_script(code_url, cmd)) + + +@click.command() +@click.argument('exp_files', nargs=-1, type=click.Path(), required=True) +@click.option('--key_name', default=lambda: os.environ["KEY_NAME"]) +@click.option('--aws_access_key_id', default=os.environ.get("AWS_ACCESS_KEY", None)) +@click.option('--aws_secret_access_key', default=os.environ.get("AWS_ACCESS_SECRET", None)) +@click.option('--archive_excludes', default=(".git", "__pycache__", ".idea", "scratch")) +@click.option('--s3_bucket') +@click.option('--spot_price') +@click.option('--region_name') +@click.option('--zone') +@click.option('--cluster_size', type=int, default=1) +@click.option('--spot_master', is_flag=True, help='Use a spot instance as the master') +@click.option('--master_instance_type') +@click.option('--worker_instance_type') +@click.option('--security_group') +@click.option('--yes', is_flag=True, help='Skip confirmation prompt') +def main(exp_files, + key_name, + aws_access_key_id, + aws_secret_access_key, + archive_excludes, + s3_bucket, + spot_price, + region_name, + zone, + cluster_size, + spot_master, + master_instance_type, + worker_instance_type, + security_group, + yes + ): + + highlight('Launching:') + highlight(locals()) + + import boto3 + ec2 = boto3.resource( + "ec2", + region_name=region_name, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key + ) + as_client = boto3.client( + 'autoscaling', + region_name=region_name, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key + ) + + for i_exp_file, exp_file in enumerate(exp_files): + with open(exp_file, 'r') as f: + exp = json.loads(f.read()) + highlight('Experiment [{}/{}]:'.format(i_exp_file + 1, len(exp_files))) + highlight(exp) + if not yes: + click.confirm('Continue?', abort=True) + + exp_prefix = exp['exp_prefix'] + exp_str = json.dumps(exp) + + exp_name = '{}_{}'.format(exp_prefix, datetime.datetime.now().strftime('%Y%m%d-%H%M%S')) + + code_url = upload_archive(exp_name, archive_excludes, s3_bucket) + highlight("code_url: " + code_url) + + image_id = AMI_MAP[region_name] + highlight('Using AMI: {}'.format(image_id)) + + if spot_master: + import base64 + requests = ec2.meta.client.request_spot_instances( + SpotPrice=spot_price, + InstanceCount=1, + LaunchSpecification=dict( + ImageId=image_id, + KeyName=key_name, + InstanceType=master_instance_type, + EbsOptimized=True, + SecurityGroups=[security_group], + Placement=dict( + AvailabilityZone=zone, + ), + UserData=base64.b64encode(make_master_script(code_url, exp_str).encode()).decode() + ) + )['SpotInstanceRequests'] + assert len(requests) == 1 + request_id = requests[0]['SpotInstanceRequestId'] + # Wait for fulfillment + highlight('Waiting for spot request {} to be fulfilled'.format(request_id)) + ec2.meta.client.get_waiter('spot_instance_request_fulfilled').wait(SpotInstanceRequestIds=[request_id]) + req = ec2.meta.client.describe_spot_instance_requests(SpotInstanceRequestIds=[request_id]) + master_instance_id = req['SpotInstanceRequests'][0]['InstanceId'] + master_instance = ec2.Instance(master_instance_id) + else: + master_instance = ec2.create_instances( + ImageId=image_id, + KeyName=key_name, + InstanceType=master_instance_type, + EbsOptimized=True, + SecurityGroups=[security_group], + MinCount=1, + MaxCount=1, + Placement=dict( + AvailabilityZone=zone, + ), + UserData=make_master_script(code_url, exp_str) + )[0] + master_instance.create_tags( + Tags=[ + dict(Key="Name", Value=exp_name + "-master"), + dict(Key="es_dist_role", Value="master"), + dict(Key="exp_prefix", Value=exp_prefix), + dict(Key="exp_name", Value=exp_name), + ] + ) + highlight("Master created. IP: %s" % master_instance.public_ip_address) + + config_resp = as_client.create_launch_configuration( + ImageId=image_id, + KeyName=key_name, + InstanceType=worker_instance_type, + EbsOptimized=True, + SecurityGroups=[security_group], + LaunchConfigurationName=exp_name, + UserData=make_worker_script(code_url, master_instance.private_ip_address), + SpotPrice=spot_price, + ) + assert config_resp["ResponseMetadata"]["HTTPStatusCode"] == 200 + + asg_resp = as_client.create_auto_scaling_group( + AutoScalingGroupName=exp_name, + LaunchConfigurationName=exp_name, + MinSize=cluster_size, + MaxSize=cluster_size, + DesiredCapacity=cluster_size, + AvailabilityZones=[zone], + Tags=[ + dict(Key="Name", Value=exp_name + "-worker"), + dict(Key="es_dist_role", Value="worker"), + dict(Key="exp_prefix", Value=exp_prefix), + dict(Key="exp_name", Value=exp_name), + ] + # todo: also try placement group to see if there is increased networking performance + ) + assert asg_resp["ResponseMetadata"]["HTTPStatusCode"] == 200 + highlight("Scaling group created") + + highlight("%s launched successfully." % exp_name) + highlight("Manage at %s" % ( + "https://%s.console.aws.amazon.com/ec2/v2/home?region=%s#Instances:sort=tag:Name" % ( + region_name, region_name) + )) + + +if __name__ == '__main__': + main() diff --git a/scripts/local_env_setup.sh b/scripts/local_env_setup.sh new file mode 100644 index 00000000..f8c1c000 --- /dev/null +++ b/scripts/local_env_setup.sh @@ -0,0 +1,3 @@ +# your environment setup for a new shell window +echo Setting up local environment +. env/bin/activate diff --git a/scripts/local_run_exp.sh b/scripts/local_run_exp.sh new file mode 100755 index 00000000..772f9dc9 --- /dev/null +++ b/scripts/local_run_exp.sh @@ -0,0 +1,11 @@ +#!/bin/sh +NAME=exp_`date "+%m_%d_%H_%M_%S"` +ALGO=$1 +EXP_FILE=$2 +tmux new -s $NAME -d +tmux send-keys -t $NAME '. scripts/local_env_setup.sh' C-m +tmux send-keys -t $NAME 'python -m es_distributed.main master --master_socket_path /tmp/es_redis_master.sock --algo '$ALGO' --exp_file '"$EXP_FILE" C-m +tmux split-window -t $NAME +tmux send-keys -t $NAME '. scripts/local_env_setup.sh' C-m +tmux send-keys -t $NAME 'python -m es_distributed.main workers --master_host localhost --relay_socket_path /tmp/es_redis_relay.sock --algo '$ALGO' --num_workers 40' C-m +tmux a -t $NAME diff --git a/scripts/local_run_redis.sh b/scripts/local_run_redis.sh new file mode 100755 index 00000000..bcca923e --- /dev/null +++ b/scripts/local_run_redis.sh @@ -0,0 +1,6 @@ +#!/bin/sh +tmux new -s redis -d +tmux send-keys -t redis 'redis-server redis_config/redis_master.conf' C-m +tmux split-window -t redis +tmux send-keys -t redis 'redis-server redis_config/redis_local_mirror.conf' C-m +tmux a -t redis diff --git a/scripts/packer.json b/scripts/packer.json new file mode 100644 index 00000000..f42a01c5 --- /dev/null +++ b/scripts/packer.json @@ -0,0 +1,54 @@ +{ + "variables": { + "aws_access_key": "", + "aws_secret_key": "" + }, + "builders": [ + { + "type": "amazon-ebs", + "access_key": "{{user `aws_access_key`}}", + "secret_key": "{{user `aws_secret_key`}}", + "region": "us-west-1", + "source_ami": "ami-d8bdebb8", + "instance_type": "t2.micro", + "ssh_username": "ubuntu", + "ami_name": "es-dist-{{isotime \"2006-01-02-03-04-05\"}}", + "ami_block_device_mappings": [ + { + "device_name": "/dev/sda1", + "volume_size": 40, + "delete_on_termination": true + } + ], + "launch_block_device_mappings": [ + { + "device_name": "/dev/sda1", + "volume_size": 40, + "delete_on_termination": true + } + ], + "ami_regions": [ + "us-west-1", + "us-west-2", + "us-east-1", + "us-east-2", + "eu-west-1", + "eu-central-1", + "ap-northeast-1", + "ap-northeast-2", + "ap-southeast-1", + "ap-southeast-2", + "ap-south-1", + "sa-east-1" + ] + } + ], + "provisioners": [ + { + "type": "shell", + "scripts": [ + "dependency.sh" + ] + } + ] +} diff --git a/scripts/viz.py b/scripts/viz.py new file mode 100644 index 00000000..b1901c5c --- /dev/null +++ b/scripts/viz.py @@ -0,0 +1,50 @@ +import click + + +@click.command() +@click.argument('env_id') +@click.argument('policy_file') +@click.option('--record', is_flag=True) +@click.option('--stochastic', is_flag=True) +@click.option('--extra_kwargs') +def main(env_id, policy_file, record, stochastic, extra_kwargs): + import gym + from gym import wrappers + import tensorflow as tf + from es_distributed.policies import MujocoPolicy, ESAtariPolicy + from es_distributed.atari_wrappers import ScaledFloatFrame, wrap_deepmind + from es_distributed.es import get_ref_batch + import numpy as np + + is_atari_policy = "NoFrameskip" in env_id + + env = gym.make(env_id) + if is_atari_policy: + env = ScaledFloatFrame(wrap_deepmind(env)) + + if record: + import uuid + env = wrappers.Monitor(env, '/tmp/' + str(uuid.uuid4()), force=True) + + if extra_kwargs: + import json + extra_kwargs = json.loads(extra_kwargs) + + with tf.Session(): + if is_atari_policy: + pi = ESAtariPolicy.Load(policy_file, extra_kwargs=extra_kwargs) + pi.set_ref_batch(get_ref_batch(env, batch_size=128)) + else: + pi = MujocoPolicy.Load(policy_file, extra_kwargs=extra_kwargs) + + while True: + rews, t = pi.rollout(env, render=True, random_stream=np.random if stochastic else None) + print('return={:.4f} len={}'.format(rews.sum(), t)) + + if record: + env.close() + return + + +if __name__ == '__main__': + main()