From 9fadeb4bc4493431ec856f5707cb20f4cb9a504c Mon Sep 17 00:00:00 2001 From: Pedro Larroy Date: Wed, 22 Aug 2018 22:22:37 +0200 Subject: [PATCH 1/2] Separate minor refactoring from #12276 in a prior PR --- ci/build.py | 212 +++++++++++++++++++++++---------- ci/docker/runtime_functions.sh | 3 + ci/docker_cache.py | 5 +- 3 files changed, 157 insertions(+), 63 deletions(-) diff --git a/ci/build.py b/ci/build.py index a9d6a63537f2..07c001fdcdc4 100755 --- a/ci/build.py +++ b/ci/build.py @@ -23,31 +23,88 @@ """ __author__ = 'Marco de Abreu, Kellen Sunderland, Anton Chernov, Pedro Larroy' -__version__ = '0.1' +__version__ = '0.2' import argparse import glob import logging -import os import re import shutil import subprocess import sys import tempfile -import platform from copy import deepcopy from itertools import chain -from subprocess import call, check_call +from subprocess import call, check_call, check_output from typing import * from util import * +import pprint +import requests + + +def retry(target_exception, tries=4, delay_s=1, backoff=2): + """Retry calling the decorated function using an exponential backoff. + + http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ + original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry + + :param target_exception: the exception to check. may be a tuple of + exceptions to check + :type target_exception: Exception or tuple + :param tries: number of times to try (not retry) before giving up + :type tries: int + :param delay_s: initial delay between retries in seconds + :type delay_s: int + :param backoff: backoff multiplier e.g. value of 2 will double the delay + each retry + :type backoff: int + """ + import time + from functools import wraps + + def decorated_retry(f): + @wraps(f) + def f_retry(*args, **kwargs): + mtries, mdelay = tries, delay_s + while mtries > 1: + try: + return f(*args, **kwargs) + except target_exception as e: + logging.warning("Exception: %s, Retrying in %d seconds...", str(e), mdelay) + time.sleep(mdelay) + mtries -= 1 + mdelay *= backoff + return f(*args, **kwargs) + + return f_retry # true decorator + + return decorated_retry + CCACHE_MAXSIZE = '500G' + +# noinspection SyntaxError def under_ci() -> bool: """:return: True if we run in Jenkins.""" return 'JOB_NAME' in os.environ -def get_platforms(path: Optional[str] = "docker"): + +def git_cleanup() -> None: + """Clean repo and subrepos, update subrepos""" + logging.info("cleaning up repository") + with remember_cwd(): + os.chdir(get_mxnet_root()) + check_call(['git', 'clean', '-ffdx']) + check_call(['git', 'submodule', 'foreach', '--recursive', 'git', 'clean', '-ffdx']) + check_call(['git', 'submodule', 'update', '--recursive', '--init']) + + +def get_dockerfiles_path(): + return "docker" + + +def get_platforms(path: str = get_dockerfiles_path()) -> List[str]: """Get a list of architectures given our dockerfiles""" dockerfiles = glob.glob(os.path.join(path, "Dockerfile.build.*")) dockerfiles = list(filter(lambda x: x[-1] != '~', dockerfiles)) @@ -57,10 +114,11 @@ def get_platforms(path: Optional[str] = "docker"): def get_docker_tag(platform: str, registry: str) -> str: + """:return: docker tag to be used for the container""" return "{0}/build.{1}".format(registry, platform) -def get_dockerfile(platform: str, path="docker") -> str: +def get_dockerfile(platform: str, path=get_dockerfiles_path()) -> str: return os.path.join(path, "Dockerfile.build.{0}".format(platform)) @@ -68,18 +126,18 @@ def get_docker_binary(use_nvidia_docker: bool) -> str: return "nvidia-docker" if use_nvidia_docker else "docker" -def build_docker(platform: str, docker_binary: str, registry: str, num_retries: int) -> None: +def build_docker(platform: str, docker_binary: str, registry: str, num_retries: int, use_cache: bool) -> str: """ Build a container for the given platform :param platform: Platform :param docker_binary: docker binary to use (docker/nvidia-docker) :param registry: Dockerhub registry name :param num_retries: Number of retries to build the docker image + :param use_cache: will pass cache_from to docker to use the previously pulled tag :return: Id of the top level image """ - tag = get_docker_tag(platform=platform, registry=registry) - logging.info("Building container tagged '%s' with %s", tag, docker_binary) + logging.info("Building docker container tagged '%s' with %s", tag, docker_binary) # # We add a user with the same group as the executing non-root user so files created in the # container match permissions of the local user. Same for the group. @@ -91,40 +149,24 @@ def build_docker(platform: str, docker_binary: str, registry: str, num_retries: # docker pull see: docker_cache.load_docker_cache # # This doesn't work with multi head docker files. - # - - for i in range(num_retries): - logging.info('%d out of %d tries to build the docker image.', i + 1, num_retries) - - cmd = [docker_binary, "build", - "-f", get_dockerfile(platform), - "--build-arg", "USER_ID={}".format(os.getuid()), - "--build-arg", "GROUP_ID={}".format(os.getgid()), - "--cache-from", tag, - "-t", tag, - "docker"] + # + cmd = [docker_binary, "build", + "-f", get_dockerfile(platform), + "--build-arg", "USER_ID={}".format(os.getuid()), + "--build-arg", "GROUP_ID={}".format(os.getgid())] + if use_cache: + cmd.extend(["--cache-from", tag]) + cmd.extend(["-t", tag, get_dockerfiles_path()]) + + @retry(subprocess.CalledProcessError, tries=num_retries) + def run_cmd(): logging.info("Running command: '%s'", ' '.join(cmd)) - try: - check_call(cmd) - # Docker build was successful. Call break to break out of the retry mechanism - break - except subprocess.CalledProcessError as e: - saved_exception = e - logging.error('Failed to build docker image') - # Building the docker image failed. Call continue to trigger the retry mechanism - continue - else: - # Num retries exceeded - logging.exception('Exception during build of docker image', saved_exception) - logging.fatal('Failed to build the docker image, aborting...') - sys.exit(1) + check_call(cmd) + run_cmd() # Get image id by reading the tag. It's guaranteed (except race condition) that the tag exists. Otherwise, the # check_call would have failed - image_id = _get_local_image_id(docker_binary=docker_binary, docker_tag=tag) - if not image_id: - raise FileNotFoundError('Unable to find docker image id matching with {}'.format(tag)) - return image_id + return _get_local_image_id(docker_binary=docker_binary, docker_tag=tag) def _get_local_image_id(docker_binary, docker_tag): @@ -134,14 +176,17 @@ def _get_local_image_id(docker_binary, docker_tag): :return: Image id as string or None if tag does not exist """ cmd = [docker_binary, "images", "-q", docker_tag] - image_id_b = subprocess.check_output(cmd) + image_id_b = check_output(cmd) image_id = image_id_b.decode('utf-8').strip() + if not image_id: + raise RuntimeError('Unable to find docker image id matching with tag {}'.format(docker_tag)) return image_id def buildir() -> str: return os.path.join(get_mxnet_root(), "build") + def default_ccache_dir() -> str: # Share ccache across containers if 'CCACHE_DIR' in os.environ: @@ -152,6 +197,7 @@ def default_ccache_dir() -> str: except PermissionError: logging.info('Unable to make dirs at %s, falling back to local temp dir', ccache_dir) # In osx tmpdir is not mountable by default + import platform if platform.system() == 'Darwin': ccache_dir = "/tmp/_mxnet_ccache" os.makedirs(ccache_dir, exist_ok=True) @@ -166,7 +212,7 @@ def container_run(platform: str, local_ccache_dir: str, command: List[str], dry_run: bool = False, - interactive: bool = False) -> str: + interactive: bool = False) -> int: tag = get_docker_tag(platform=platform, registry=docker_registry) mx_root = get_mxnet_root() local_build_folder = buildir() @@ -193,15 +239,13 @@ def container_run(platform: str, logging.info("Executing:\n%s\n", cmd) ret = call(runlist) - docker_run_cmd = ' '.join(runlist) if not dry_run and interactive: into_cmd = deepcopy(runlist) # -ti can't be after the tag, as is interpreted as a command so hook it up after the -u argument idx = into_cmd.index('-u') + 2 into_cmd[idx:idx] = ['-ti'] - cmd = '\\\n\t'.join(into_cmd) + cmd = ' \\\n\t'.join(into_cmd) logging.info("Executing:\n%s\n", cmd) - docker_run_cmd = ' '.join(into_cmd) ret = call(into_cmd) if not dry_run and not interactive and ret != 0: @@ -209,11 +253,12 @@ def container_run(platform: str, logging.error("You can get into the container by adding the -i option") raise subprocess.CalledProcessError(ret, cmd) - return docker_run_cmd + return ret def list_platforms() -> str: - print("\nSupported platforms:\n{}".format('\n'.join(get_platforms()))) + return "\nSupported platforms:\n{}".format('\n'.join(get_platforms())) + def load_docker_cache(tag, docker_registry) -> None: if docker_registry: @@ -221,24 +266,59 @@ def load_docker_cache(tag, docker_registry) -> None: import docker_cache logging.info('Docker cache download is enabled from registry %s', docker_registry) docker_cache.load_docker_cache(registry=docker_registry, docker_tag=tag) + # noinspection PyBroadException except Exception: logging.exception('Unable to retrieve Docker cache. Continue without...') else: logging.info('Distributed docker cache disabled') -def main() -> int: + +def ec2_instance_id_hostname() -> str: + if under_ci(): + result = [] + try: + r = requests.get("http://instance-data/latest/meta-data/instance-id") + if r.status_code == 200: + result.append(r.content.decode()) + r = requests.get("http://instance-data/latest/meta-data/public-hostname") + if r.status_code == 200: + result.append(r.content.decode()) + return ' '.join(result) + except ConnectionError: + pass + return '?' + else: + return '' + + +def log_environment(): + instance_id = ec2_instance_id_hostname() + if instance_id: + logging.info("EC2 Instance id: %s", instance_id) + pp = pprint.PrettyPrinter(indent=4) + logging.debug("Build environment: %s", pp.pformat(dict(os.environ))) + + +def chdir_to_script_directory(): # We need to be in the same directory than the script so the commands in the dockerfiles work as # expected. But the script can be invoked from a different path base = os.path.split(os.path.realpath(__file__))[0] os.chdir(base) - logging.getLogger().setLevel(logging.INFO) - def script_name() -> str: - return os.path.split(sys.argv[0])[1] +def script_name() -> str: + return os.path.split(sys.argv[0])[1] + +def main() -> int: + logging.getLogger().setLevel(logging.INFO) + logging.getLogger("requests").setLevel(logging.WARNING) logging.basicConfig(format='{}: %(asctime)-15s %(message)s'.format(script_name())) + logging.info("MXNet container based build tool.") + log_environment() + chdir_to_script_directory() + parser = argparse.ArgumentParser(description="""Utility for building and testing MXNet on docker containers""", epilog="") parser.add_argument("-p", "--platform", @@ -284,7 +364,7 @@ def script_name() -> str: default=1, type=int) - parser.add_argument("-c", "--cache", action="store_true", + parser.add_argument("--no-cache", action="store_true", help="Enable docker registry cache") parser.add_argument("command", @@ -297,8 +377,9 @@ def script_name() -> str: type=str) args = parser.parse_args() + def use_cache(): - return args.cache or under_ci() + return not args.no_cache or under_ci() command = list(chain(*args.command)) docker_binary = get_docker_binary(args.nvidiadocker) @@ -306,13 +387,14 @@ def use_cache(): num_docker_build_retires = args.docker_build_retries if args.list: - list_platforms() + print(list_platforms()) elif args.platform: platform = args.platform tag = get_docker_tag(platform=platform, registry=args.docker_registry) if use_cache(): load_docker_cache(tag=tag, docker_registry=args.docker_registry) - build_docker(platform, docker_binary, registry=args.docker_registry, num_retries=num_docker_build_retires) + build_docker(platform=platform, docker_binary=docker_binary, registry=args.docker_registry, + num_retries=num_docker_build_retires, use_cache=use_cache()) if args.build_only: logging.warning("Container was just built. Exiting due to build-only.") return 0 @@ -323,7 +405,8 @@ def use_cache(): local_ccache_dir=args.ccache_dir, interactive=args.interactive) elif args.print_docker_run: print(container_run(platform=platform, docker_binary=docker_binary, shared_memory_size=shared_memory_size, - command=[], dry_run=True, docker_registry=args.docker_registry, local_ccache_dir=args.ccache_dir)) + command=[], dry_run=True, docker_registry=args.docker_registry, + local_ccache_dir=args.ccache_dir)) elif args.interactive: container_run(platform=platform, docker_binary=docker_binary, shared_memory_size=shared_memory_size, command=command, docker_registry=args.docker_registry, @@ -346,15 +429,21 @@ def use_cache(): tag = get_docker_tag(platform=platform, registry=args.docker_registry) if use_cache(): load_docker_cache(tag=tag, docker_registry=args.docker_registry) - build_docker(platform, docker_binary, args.docker_registry, num_retries=num_docker_build_retires) + build_docker(platform, docker_binary, args.docker_registry, num_retries=args.docker_build_retries, + use_cache=use_cache()) if args.build_only: continue - build_platform = "build_{}".format(platform) - cmd = ["/work/mxnet/ci/docker/runtime_functions.sh", build_platform] + git_cleanup() shutil.rmtree(buildir(), ignore_errors=True) + build_platform = "build_{}".format(platform) + plat_buildir = os.path.abspath(os.path.join(get_mxnet_root(), '..', + "mxnet_{}".format(build_platform))) + if os.path.exists(plat_buildir): + logging.warning("{} already exists, skipping".format(plat_buildir)) + continue + command = ["/work/mxnet/ci/docker/runtime_functions.sh", build_platform] container_run(platform=platform, docker_binary=docker_binary, shared_memory_size=shared_memory_size, - command=cmd, docker_registry=args.docker_registry, local_ccache_dir=args.ccache_dir) - plat_buildir = os.path.join(get_mxnet_root(), build_platform) + command=command, docker_registry=args.docker_registry, local_ccache_dir=args.ccache_dir) shutil.move(buildir(), plat_buildir) logging.info("Built files left in: %s", plat_buildir) @@ -383,7 +472,8 @@ def use_cache(): ./build.py -a - Builds for all platforms and leaves artifacts in build_ + Builds for all platforms and leaves artifacts in build_. **WARNING** it performs git + cleanup of the repo. """) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 1e38ec48e6ce..d3c1411b4383 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -567,6 +567,9 @@ build_ubuntu_gpu_cmake() { ninja -v } +build_ubuntu_blc() { + echo "pass" +} # Testing diff --git a/ci/docker_cache.py b/ci/docker_cache.py index 7a6d1106d38d..bebcb25fb8f8 100755 --- a/ci/docker_cache.py +++ b/ci/docker_cache.py @@ -30,6 +30,7 @@ import sys import subprocess import json +from typing import * import build as build_util @@ -59,7 +60,7 @@ def build_save_containers(platforms, registry, load_cache) -> int: return 1 if is_error else 0 -def _build_save_container(platform, registry, load_cache) -> str: +def _build_save_container(platform, registry, load_cache) -> Optional[str]: """ Build image for passed platform and upload the cache to the specified S3 bucket :param platform: Platform @@ -77,7 +78,7 @@ def _build_save_container(platform, registry, load_cache) -> str: logging.debug('Building %s as %s', platform, docker_tag) try: # Increase the number of retries for building the cache. - image_id = build_util.build_docker(docker_binary='docker', platform=platform, registry=registry, num_retries=10) + image_id = build_util.build_docker(docker_binary='docker', platform=platform, registry=registry, num_retries=10, use_cache=True) logging.info('Built %s as %s', docker_tag, image_id) # Push cache to registry From fdb4f47e3de2d09c2063be952cc1f38d15d75335 Mon Sep 17 00:00:00 2001 From: Pedro Larroy Date: Thu, 23 Aug 2018 15:10:37 +0200 Subject: [PATCH 2/2] Address CR comments --- ci/build.py | 112 ++++++++-------------------------------------------- ci/util.py | 71 +++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 95 deletions(-) diff --git a/ci/build.py b/ci/build.py index 07c001fdcdc4..f1a5e99e2d0e 100755 --- a/ci/build.py +++ b/ci/build.py @@ -42,64 +42,9 @@ import requests -def retry(target_exception, tries=4, delay_s=1, backoff=2): - """Retry calling the decorated function using an exponential backoff. - - http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ - original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry - - :param target_exception: the exception to check. may be a tuple of - exceptions to check - :type target_exception: Exception or tuple - :param tries: number of times to try (not retry) before giving up - :type tries: int - :param delay_s: initial delay between retries in seconds - :type delay_s: int - :param backoff: backoff multiplier e.g. value of 2 will double the delay - each retry - :type backoff: int - """ - import time - from functools import wraps - - def decorated_retry(f): - @wraps(f) - def f_retry(*args, **kwargs): - mtries, mdelay = tries, delay_s - while mtries > 1: - try: - return f(*args, **kwargs) - except target_exception as e: - logging.warning("Exception: %s, Retrying in %d seconds...", str(e), mdelay) - time.sleep(mdelay) - mtries -= 1 - mdelay *= backoff - return f(*args, **kwargs) - - return f_retry # true decorator - - return decorated_retry - - CCACHE_MAXSIZE = '500G' -# noinspection SyntaxError -def under_ci() -> bool: - """:return: True if we run in Jenkins.""" - return 'JOB_NAME' in os.environ - - -def git_cleanup() -> None: - """Clean repo and subrepos, update subrepos""" - logging.info("cleaning up repository") - with remember_cwd(): - os.chdir(get_mxnet_root()) - check_call(['git', 'clean', '-ffdx']) - check_call(['git', 'submodule', 'foreach', '--recursive', 'git', 'clean', '-ffdx']) - check_call(['git', 'submodule', 'update', '--recursive', '--init']) - - def get_dockerfiles_path(): return "docker" @@ -148,6 +93,10 @@ def build_docker(platform: str, docker_binary: str, registry: str, num_retries: # cache-from is needed so we use the cached images tagged from the remote via # docker pull see: docker_cache.load_docker_cache # + # This also prevents using local layers for caching: https://github.com/moby/moby/issues/33002 + # So to use local caching, we should omit the cache-from by using --no-dockerhub-cache argument to this + # script. + # # This doesn't work with multi head docker files. # cmd = [docker_binary, "build", @@ -253,7 +202,7 @@ def container_run(platform: str, logging.error("You can get into the container by adding the -i option") raise subprocess.CalledProcessError(ret, cmd) - return ret + return ret def list_platforms() -> str: @@ -273,24 +222,6 @@ def load_docker_cache(tag, docker_registry) -> None: logging.info('Distributed docker cache disabled') -def ec2_instance_id_hostname() -> str: - if under_ci(): - result = [] - try: - r = requests.get("http://instance-data/latest/meta-data/instance-id") - if r.status_code == 200: - result.append(r.content.decode()) - r = requests.get("http://instance-data/latest/meta-data/public-hostname") - if r.status_code == 200: - result.append(r.content.decode()) - return ' '.join(result) - except ConnectionError: - pass - return '?' - else: - return '' - - def log_environment(): instance_id = ec2_instance_id_hostname() if instance_id: @@ -299,13 +230,6 @@ def log_environment(): logging.debug("Build environment: %s", pp.pformat(dict(os.environ))) -def chdir_to_script_directory(): - # We need to be in the same directory than the script so the commands in the dockerfiles work as - # expected. But the script can be invoked from a different path - base = os.path.split(os.path.realpath(__file__))[0] - os.chdir(base) - - def script_name() -> str: return os.path.split(sys.argv[0])[1] @@ -364,8 +288,10 @@ def main() -> int: default=1, type=int) - parser.add_argument("--no-cache", action="store_true", - help="Enable docker registry cache") + parser.add_argument("-c", "--no-dockerhub-cache", action="store_true", + help="Disables use of --cache-from option on docker build, allowing docker" + " to use local layers for caching. If absent, we use the cache from dockerhub" + " which is the default.") parser.add_argument("command", help="command to run in the container", @@ -379,12 +305,10 @@ def main() -> int: args = parser.parse_args() def use_cache(): - return not args.no_cache or under_ci() + return not args.no_dockerhub_cache or under_ci() command = list(chain(*args.command)) docker_binary = get_docker_binary(args.nvidiadocker) - shared_memory_size = args.shared_memory_size - num_docker_build_retires = args.docker_build_retries if args.list: print(list_platforms()) @@ -394,21 +318,21 @@ def use_cache(): if use_cache(): load_docker_cache(tag=tag, docker_registry=args.docker_registry) build_docker(platform=platform, docker_binary=docker_binary, registry=args.docker_registry, - num_retries=num_docker_build_retires, use_cache=use_cache()) + num_retries=args.docker_build_retries, use_cache=use_cache()) if args.build_only: logging.warning("Container was just built. Exiting due to build-only.") return 0 if command: - container_run(platform=platform, docker_binary=docker_binary, shared_memory_size=shared_memory_size, + container_run(platform=platform, docker_binary=docker_binary, shared_memory_size=args.shared_memory_size, command=command, docker_registry=args.docker_registry, local_ccache_dir=args.ccache_dir, interactive=args.interactive) elif args.print_docker_run: - print(container_run(platform=platform, docker_binary=docker_binary, shared_memory_size=shared_memory_size, + print(container_run(platform=platform, docker_binary=docker_binary, shared_memory_size=args.shared_memory_size, command=[], dry_run=True, docker_registry=args.docker_registry, local_ccache_dir=args.ccache_dir)) elif args.interactive: - container_run(platform=platform, docker_binary=docker_binary, shared_memory_size=shared_memory_size, + container_run(platform=platform, docker_binary=docker_binary, shared_memory_size=args.shared_memory_size, command=command, docker_registry=args.docker_registry, local_ccache_dir=args.ccache_dir, interactive=args.interactive) @@ -417,7 +341,7 @@ def use_cache(): assert not args.interactive, "when running with -i must provide a command" cmd = ["/work/mxnet/ci/docker/runtime_functions.sh", "build_{}".format(platform)] logging.info("No command specified, trying default build: %s", ' '.join(cmd)) - container_run(platform=platform, docker_binary=docker_binary, shared_memory_size=shared_memory_size, + container_run(platform=platform, docker_binary=docker_binary, shared_memory_size=args.shared_memory_size, command=cmd, docker_registry=args.docker_registry, local_ccache_dir=args.ccache_dir) @@ -433,7 +357,6 @@ def use_cache(): use_cache=use_cache()) if args.build_only: continue - git_cleanup() shutil.rmtree(buildir(), ignore_errors=True) build_platform = "build_{}".format(platform) plat_buildir = os.path.abspath(os.path.join(get_mxnet_root(), '..', @@ -442,7 +365,7 @@ def use_cache(): logging.warning("{} already exists, skipping".format(plat_buildir)) continue command = ["/work/mxnet/ci/docker/runtime_functions.sh", build_platform] - container_run(platform=platform, docker_binary=docker_binary, shared_memory_size=shared_memory_size, + container_run(platform=platform, docker_binary=docker_binary, shared_memory_size=args.shared_memory_size, command=command, docker_registry=args.docker_registry, local_ccache_dir=args.ccache_dir) shutil.move(buildir(), plat_buildir) logging.info("Built files left in: %s", plat_buildir) @@ -472,8 +395,7 @@ def use_cache(): ./build.py -a - Builds for all platforms and leaves artifacts in build_. **WARNING** it performs git - cleanup of the repo. + Builds for all platforms and leaves artifacts in build_. """) diff --git a/ci/util.py b/ci/util.py index 22631f30435f..98605bedf765 100644 --- a/ci/util.py +++ b/ci/util.py @@ -17,6 +17,7 @@ import os import contextlib +import requests def get_mxnet_root() -> str: curpath = os.path.abspath(os.path.dirname(__file__)) @@ -41,3 +42,73 @@ def remember_cwd(): finally: os.chdir(curdir) +def retry(target_exception, tries=4, delay_s=1, backoff=2): + """Retry calling the decorated function using an exponential backoff. + + http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ + original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry + + :param target_exception: the exception to check. may be a tuple of + exceptions to check + :type target_exception: Exception or tuple + :param tries: number of times to try (not retry) before giving up + :type tries: int + :param delay_s: initial delay between retries in seconds + :type delay_s: int + :param backoff: backoff multiplier e.g. value of 2 will double the delay + each retry + :type backoff: int + """ + import time + from functools import wraps + + def decorated_retry(f): + @wraps(f) + def f_retry(*args, **kwargs): + mtries, mdelay = tries, delay_s + while mtries > 1: + try: + return f(*args, **kwargs) + except target_exception as e: + logging.warning("Exception: %s, Retrying in %d seconds...", str(e), mdelay) + time.sleep(mdelay) + mtries -= 1 + mdelay *= backoff + return f(*args, **kwargs) + + return f_retry # true decorator + + return decorated_retry + + +# noinspection SyntaxError +def under_ci() -> bool: + """:return: True if we run in Jenkins.""" + return 'JOB_NAME' in os.environ + + +def ec2_instance_id_hostname() -> str: + if under_ci(): + result = [] + try: + r = requests.get("http://instance-data/latest/meta-data/instance-id") + if r.status_code == 200: + result.append(r.content.decode()) + r = requests.get("http://instance-data/latest/meta-data/public-hostname") + if r.status_code == 200: + result.append(r.content.decode()) + return ' '.join(result) + except ConnectionError: + pass + return '?' + else: + return '' + + +def chdir_to_script_directory(): + # We need to be in the same directory than the script so the commands in the dockerfiles work as + # expected. But the script can be invoked from a different path + base = os.path.split(os.path.realpath(__file__))[0] + os.chdir(base) + +