Skip to content

Commit

Permalink
bpo-29505: Fuzz json module, enforce size limit on int(x) fuzz (pytho…
Browse files Browse the repository at this point in the history
…nGH-13991)

* bpo-29505: Enable fuzz testing of the json module, enforce size limit on int(x) fuzz and json input size to avoid timeouts.

Contributed by by Ammar Askar for Google.
  • Loading branch information
ammaraskar authored and gpshead committed Jun 12, 2019
1 parent 405f648 commit a6e190e
Show file tree
Hide file tree
Showing 10 changed files with 171 additions and 1 deletion.
10 changes: 10 additions & 0 deletions Modules/_xxtestfuzz/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,16 @@ And invoke it from ``LLVMFuzzerTestOneInput``::
``LLVMFuzzerTestOneInput`` will run in oss-fuzz, with each test in
``fuzz_tests.txt`` run separately.

Seed data (corpus) for the test can be provided in a subfolder called
``<test_name>_corpus`` such as ``fuzz_json_loads_corpus``. A wide variety
of good input samples allows the fuzzer to more easily explore a diverse
set of paths and provides a better base to find buggy input from.

Dictionaries of tokens (see oss-fuzz documentation for more details) can
be placed in the ``dictionaries`` folder with the name of the test.
For example, ``dictionaries/fuzz_json_loads.dict`` contains JSON tokens
to guide the fuzzer.

What makes a good fuzz test
---------------------------

Expand Down
40 changes: 40 additions & 0 deletions Modules/_xxtestfuzz/dictionaries/fuzz_json_loads.dict
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"0"
",0"
":0"
"0:"
"-1.2e+3"

"true"
"false"
"null"

"\"\""
",\"\""
":\"\""
"\"\":"

"{}"
",{}"
":{}"
"{\"\":0}"
"{{}}"

"[]"
",[]"
":[]"
"[0]"
"[[]]"

"''"
"\\"
"\\b"
"\\f"
"\\n"
"\\r"
"\\t"
"\\u0000"
"\\x00"
"\\0"
"\\uD800\\uDC00"
"\\uDBFF\\uDFFF"

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
58 changes: 58 additions & 0 deletions Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
[
"JSON Test Pattern pass1",
{"object with 1 member":["array with 1 element"]},
{},
[],
-42,
true,
false,
null,
{
"integer": 1234567890,
"real": -9876.543210,
"e": 0.123456789e-12,
"E": 1.234567890E+34,
"": 23456789012E66,
"zero": 0,
"one": 1,
"space": " ",
"quote": "\"",
"backslash": "\\",
"controls": "\b\f\n\r\t",
"slash": "/ & \/",
"alpha": "abcdefghijklmnopqrstuvwyz",
"ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ",
"digit": "0123456789",
"0123456789": "digit",
"special": "`1~!@#$%^&*()_+-={':[,]}|;.</>?",
"hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A",
"true": true,
"false": false,
"null": null,
"array":[ ],
"object":{ },
"address": "50 St. James Street",
"url": "http://www.JSON.org/",
"comment": "// /* <!-- --",
"# -- --> */": " ",
" s p a c e d " :[1,2 , 3

,

4 , 5 , 6 ,7 ],"compact":[1,2,3,4,5,6,7],
"jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}",
"quotes": "&#34; \u0022 %22 0x22 034 &#x22;",
"\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?"
: "A key can be any string"
},
0.5 ,98.6
,
99.44
,

1066,
1e1,
0.1e1,
1e-1,
1e00,2e+00,2e-00
,"rosebud"]
1 change: 1 addition & 0 deletions Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]]
6 changes: 6 additions & 0 deletions Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass3.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"JSON Test Pattern pass3": {
"The outermost value": "must be an object or array.",
"In this test": "It is an object."
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[1, 2, 3, "abcd", "xyz"]
1 change: 1 addition & 0 deletions Modules/_xxtestfuzz/fuzz_tests.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
fuzz_builtin_float
fuzz_builtin_int
fuzz_builtin_unicode
fuzz_json_loads
53 changes: 52 additions & 1 deletion Modules/_xxtestfuzz/fuzzer.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,15 @@ static int fuzz_builtin_float(const char* data, size_t size) {
return 0;
}

#define MAX_INT_TEST_SIZE 0x10000

/* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */
static int fuzz_builtin_int(const char* data, size_t size) {
/* Ignore test cases with very long ints to avoid timeouts
int("9" * 1000000) is not a very interesting test caase */
if (size > MAX_INT_TEST_SIZE) {
return 0;
}
/* Pick a random valid base. (When the fuzzed function takes extra
parameters, it's somewhat normal to hash the input to generate those
parameters. We want to exercise all code paths, so we do so here.) */
Expand Down Expand Up @@ -72,6 +79,42 @@ static int fuzz_builtin_unicode(const char* data, size_t size) {
return 0;
}

#define MAX_JSON_TEST_SIZE 0x10000

/* Initialized in LLVMFuzzerTestOneInput */
PyObject* json_loads_method = NULL;
/* Fuzz json.loads(x) */
static int fuzz_json_loads(const char* data, size_t size) {
/* Since python supports arbitrarily large ints in JSON,
long inputs can lead to timeouts on boring inputs like
`json.loads("9" * 100000)` */
if (size > MAX_JSON_TEST_SIZE) {
return 0;
}
PyObject* input_bytes = PyBytes_FromStringAndSize(data, size);
if (input_bytes == NULL) {
return 0;
}
PyObject* parsed = PyObject_CallFunctionObjArgs(json_loads_method, input_bytes, NULL);
/* Ignore ValueError as the fuzzer will more than likely
generate some invalid json and values */
if (parsed == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
PyErr_Clear();
}
/* Ignore RecursionError as the fuzzer generates long sequences of
arrays such as `[[[...` */
if (parsed == NULL && PyErr_ExceptionMatches(PyExc_RecursionError)) {
PyErr_Clear();
}
/* Ignore unicode errors, invalid byte sequences are common */
if (parsed == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
PyErr_Clear();
}
Py_DECREF(input_bytes);
Py_XDECREF(parsed);
return 0;
}

/* Run fuzzer and abort on failure. */
static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
int rv = fuzzer((const char*) data, size);
Expand All @@ -88,7 +131,6 @@ static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char*
/* CPython generates a lot of leak warnings for whatever reason. */
int __lsan_is_turned_off(void) { return 1; }

wchar_t wide_program_name[NAME_MAX];

int LLVMFuzzerInitialize(int *argc, char ***argv) {
wchar_t* wide_program_name = Py_DecodeLocale(*argv[0], NULL);
Expand All @@ -110,6 +152,12 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
initialize CPython ourselves on the first run. */
Py_InitializeEx(0);
}
#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
if (json_loads_method == NULL) {
PyObject* json_module = PyImport_ImportModule("json");
json_loads_method = PyObject_GetAttrString(json_module, "loads");
}
#endif

int rv = 0;

Expand All @@ -121,6 +169,9 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
#endif
#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_unicode)
rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
#endif
#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
rv |= _run_fuzz(data, size, fuzz_json_loads);
#endif
return rv;
}

0 comments on commit a6e190e

Please sign in to comment.