Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Argument caching #442

Merged
merged 6 commits into from
Apr 18, 2016
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
limit argument cache size
  • Loading branch information
kmike committed Apr 15, 2016
commit 37e4bf4f8a9e4701d64a8b2ded6fe5564b87d66a
50 changes: 48 additions & 2 deletions splash/argument_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,18 @@
from __future__ import absolute_import
import json
import hashlib
from collections import OrderedDict


class ArgumentCache(object):
"""
>>> cache = ArgumentCache()
>>> "foo" in cache
False
>>> cache['foo']
Traceback (most recent call last):
...
KeyError: 'foo'
>>> len(cache)
0
>>> key = cache.add("Hello, world!")
Expand Down Expand Up @@ -38,16 +43,57 @@ class ArgumentCache(object):
>>> cache.clear()
>>> len(cache)
0

Size of ArgumentCache can be limited:

>>> cache = ArgumentCache(0)
Traceback (most recent call last):
...
ValueError: maxsize must be greater than 0
>>> cache = ArgumentCache(2) # limit it to 2 elements
>>> cache.add_many(['value1', 'value2'])
['daf626c4ebd6bdd697e043111454304e5fb1459e', '849988af22dbd04d3e353caf77f9d81241ca9ee2']
>>> len(cache)
2
>>> cache.add("Hello, world!")
'bea2c9d7fd040292e0424938af39f7d6334e8d8a'
>>> len(cache)
2
>>> cache["bea2c9d7fd040292e0424938af39f7d6334e8d8a"]
'Hello, world!'
>>> cache['849988af22dbd04d3e353caf77f9d81241ca9ee2']
'value2'
>>> cache['daf626c4ebd6bdd697e043111454304e5fb1459e']
Traceback (most recent call last):
...
KeyError: 'daf626c4ebd6bdd697e043111454304e5fb1459e'
>>> cache.add("foo")
'd465e627f9946f2fa0d2dc0fc04e5385bc6cd46d'
>>> len(cache)
2
>>> 'bea2c9d7fd040292e0424938af39f7d6334e8d8a' in cache
False
"""
def __init__(self):
self._values = {} # TODO: LRU cache
def __init__(self, maxsize=None):
if maxsize is None:
maxsize = float("+inf")
if maxsize <= 0:
raise ValueError("maxsize must be greater than 0")
self.maxsize = maxsize
self._values = OrderedDict()

def add(self, value):
key = self.get_key(value)
if key in self._values:
del self._values[key]
else:
while len(self._values) >= self.maxsize:
self._values.popitem(last=False)
self._values[key] = value
return key

def __getitem__(self, key):
self._values.move_to_end(key)
return self._values[key]

def __contains__(self, key):
Expand Down
3 changes: 3 additions & 0 deletions splash/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@
# pool options
SLOTS = 50

# argument cache option
ARGUMENT_CACHE_MAX_ENTRIES = 500

# security options
ALLOWED_SCHEMES = ['http', 'https', 'data', 'ftp', 'sftp', 'ws', 'wss']
JS_CROSS_DOMAIN_ENABLED = False
Expand Down
65 changes: 40 additions & 25 deletions splash/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,19 +80,17 @@ def _format_error(self, code, exc):
return err


cache = ArgumentCache()


class BaseRenderResource(_ValidatingResource):

isLeaf = True
content_type = "text/html; charset=utf-8"

def __init__(self, pool, max_timeout):
def __init__(self, pool, max_timeout, argument_cache):
Resource.__init__(self)
self.pool = pool
self.js_profiles_path = self.pool.js_profiles_path
self.max_timeout = max_timeout
self.argument_cache = argument_cache

def render_GET(self, request):
#log.msg("%s %s %s %s" % (id(request), request.method, request.path, request.args))
Expand All @@ -101,18 +99,18 @@ def render_GET(self, request):

# process argument cache
original_options = render_options.data.copy()
expired_args = render_options.get_expired_args(cache)
expired_args = render_options.get_expired_args(self.argument_cache)
if expired_args:
error = self._write_expired_args(request, expired_args)
self._log_stats(request, original_options, error)
return b"\n"

saved_args = render_options.save_args_to_cache(cache)
saved_args = render_options.save_args_to_cache(self.argument_cache)
if saved_args:
value = ';'.join("{}={}".format(name, value)
for name, value in saved_args)
request.setHeader(b'X-Splash-Saved-Arguments', value.encode('utf8'))
render_options.load_cached_args(cache)
render_options.load_cached_args(self.argument_cache)

# check arguments before starting the render
render_options.get_filters(self.pool)
Expand Down Expand Up @@ -284,8 +282,10 @@ class ExecuteLuaScriptResource(BaseRenderResource):
def __init__(self, pool, sandboxed,
lua_package_path,
lua_sandbox_allowed_modules,
max_timeout):
BaseRenderResource.__init__(self, pool, max_timeout)
max_timeout,
argument_cache,
):
BaseRenderResource.__init__(self, pool, max_timeout, argument_cache)
self.sandboxed = sandboxed
self.lua_package_path = lua_package_path
self.lua_sandbox_allowed_modules = lua_sandbox_allowed_modules
Expand Down Expand Up @@ -341,8 +341,9 @@ def _get_render(self, request, options):
class DebugResource(Resource):
isLeaf = True

def __init__(self, pool, warn=False):
def __init__(self, pool, argument_cache, warn=False):
Resource.__init__(self)
self.argument_cache = argument_cache
self.pool = pool
self.warn = warn

Expand All @@ -354,13 +355,14 @@ def render_GET(self, request):
"qsize": len(self.pool.queue.pending),
"maxrss": resource.getrusage(resource.RUSAGE_SELF).ru_maxrss,
"fds": get_num_fds(),
"argcache": len(self.argument_cache)
}
if self.warn:
info['WARNING'] = "/debug endpoint is deprecated. " \
"Please use /_debug instead."
# info['leaks'] = get_leaks()

return (json.dumps(info)).encode('utf-8')
return (json.dumps(info, sort_keys=True)).encode('utf-8')

def get_repr(self, render):
if hasattr(render, 'url'):
Expand All @@ -372,13 +374,20 @@ class ClearCachesResource(Resource):
isLeaf = True
content_type = "application/json"

def __init__(self, argument_cache):
Resource.__init__(self)
self.argument_cache = argument_cache

def render_POST(self, request):
argcache_size = len(self.argument_cache)
self.argument_cache.clear()
clear_caches()
unreachable = gc.collect()
return json.dumps({
"status": "ok",
"pyobjects_collected": unreachable
}).encode('utf-8')
"pyobjects_collected": unreachable,
"cached_args_removed": argcache_size,
}, sort_keys=True).encode('utf-8')


class PingResource(Resource):
Expand All @@ -389,7 +398,7 @@ def render_GET(self, request):
return (json.dumps({
"status": "ok",
"maxrss": get_ru_maxrss(),
})).encode('utf-8')
}, sort_keys=True)).encode('utf-8')



Expand Down Expand Up @@ -554,30 +563,36 @@ class Root(Resource):
def __init__(self, pool, ui_enabled, lua_enabled, lua_sandbox_enabled,
lua_package_path,
lua_sandbox_allowed_modules,
max_timeout):
max_timeout,
argument_cache_max_entries,
):
Resource.__init__(self)
self.argument_cache = ArgumentCache(argument_cache_max_entries)
self.ui_enabled = ui_enabled
self.lua_enabled = lua_enabled
self.putChild(b"render.html", RenderHtmlResource(pool, max_timeout))
self.putChild(b"render.png", RenderPngResource(pool, max_timeout))
self.putChild(b"render.jpeg", RenderJpegResource(pool, max_timeout))
self.putChild(b"render.json", RenderJsonResource(pool, max_timeout))
self.putChild(b"render.har", RenderHarResource(pool, max_timeout))

self.putChild(b"_debug", DebugResource(pool))
self.putChild(b"_gc", ClearCachesResource())

_args = pool, max_timeout, self.argument_cache
self.putChild(b"render.html", RenderHtmlResource(*_args))
self.putChild(b"render.png", RenderPngResource(*_args))
self.putChild(b"render.jpeg", RenderJpegResource(*_args))
self.putChild(b"render.json", RenderJsonResource(*_args))
self.putChild(b"render.har", RenderHarResource(*_args))

self.putChild(b"_debug", DebugResource(pool, self.argument_cache))
self.putChild(b"_gc", ClearCachesResource(self.argument_cache))
self.putChild(b"_ping", PingResource())

# backwards compatibility
self.putChild(b"debug", DebugResource(pool, warn=True))
self.putChild(b"debug", DebugResource(pool, self.argument_cache, warn=True))

if self.lua_enabled and ExecuteLuaScriptResource is not None:
self.putChild(b"execute", ExecuteLuaScriptResource(
pool=pool,
sandboxed=lua_sandbox_enabled,
lua_package_path=lua_package_path,
lua_sandbox_allowed_modules=lua_sandbox_allowed_modules,
max_timeout=max_timeout
max_timeout=max_timeout,
argument_cache=self.argument_cache,
))

if self.ui_enabled:
Expand Down
17 changes: 14 additions & 3 deletions splash/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ def parse_opts(jupyter=False, argv=sys.argv):
help="disable web UI")
op.add_option("--disable-lua", action="store_true", default=False,
help="disable Lua scripting")
op.add_option("--argument-cache-max-entries", type="int",
default=defaults.ARGUMENT_CACHE_MAX_ENTRIES,
help="maximum number of entries in arguments cache (default: %default)")

opts, args = op.parse_args(argv)

Expand All @@ -84,11 +87,11 @@ def parse_opts(jupyter=False, argv=sys.argv):
opts.port = None
opts.slots = None
opts.max_timeout = None
opts.argument_cache_max_entries = None

return opts, args



def start_logging(opts):
import twisted
from twisted.python import log
Expand Down Expand Up @@ -168,6 +171,7 @@ def splash_server(portnum, slots, network_manager_factory, max_timeout,
lua_sandbox_enabled=True,
lua_package_path="",
lua_sandbox_allowed_modules=(),
argument_cache_max_entries=None,
verbosity=None):
from twisted.internet import reactor
from twisted.web.server import Site
Expand All @@ -182,6 +186,9 @@ def splash_server(portnum, slots, network_manager_factory, max_timeout,
slots = defaults.SLOTS if slots is None else slots
log.msg("slots=%s" % slots)

if argument_cache_max_entries:
log.msg("argument_cache_max_entries=%s" % argument_cache_max_entries)

pool = RenderPool(
slots=slots,
network_manager_factory=network_manager_factory,
Expand Down Expand Up @@ -211,7 +218,8 @@ def splash_server(portnum, slots, network_manager_factory, max_timeout,
lua_sandbox_enabled=lua_sandbox_enabled,
lua_package_path=lua_package_path,
lua_sandbox_allowed_modules=lua_sandbox_allowed_modules,
max_timeout=max_timeout
max_timeout=max_timeout,
argument_cache_max_entries=argument_cache_max_entries,
)
factory = Site(root)
reactor.listenTCP(portnum, factory)
Expand Down Expand Up @@ -255,6 +263,7 @@ def default_splash_server(portnum, max_timeout, slots=None,
lua_sandbox_enabled=True,
lua_package_path="",
lua_sandbox_allowed_modules=(),
argument_cache_max_entries=None,
verbosity=None,
server_factory=splash_server):
from splash import network_manager
Expand All @@ -278,7 +287,8 @@ def default_splash_server(portnum, max_timeout, slots=None,
lua_package_path=lua_package_path,
lua_sandbox_allowed_modules=lua_sandbox_allowed_modules,
verbosity=verbosity,
max_timeout=max_timeout
max_timeout=max_timeout,
argument_cache_max_entries=argument_cache_max_entries,
)


Expand Down Expand Up @@ -358,6 +368,7 @@ def main(jupyter=False, argv=sys.argv, server_factory=splash_server):
lua_sandbox_allowed_modules=opts.lua_sandbox_allowed_modules.split(";"),
verbosity=opts.verbosity,
max_timeout=opts.max_timeout,
argument_cache_max_entries=opts.argument_cache_max_entries,
server_factory=server_factory,
)
signal.signal(signal.SIGUSR1, lambda s, f: traceback.print_stack(f))
Expand Down