common/json.c: Check that JSMN result is well-formed.

xref: https://lists.ozlabs.org/pipermail/c-lightning/2020-June/000188.html

Changelog-Fixed: Reject some bad JSON at parsing.
This commit is contained in:
ZmnSCPxj jxPCSnmZ 2020-06-09 01:23:54 +08:00 committed by Rusty Russell
parent 4302afd9a5
commit e8936f9d23
2 changed files with 238 additions and 4 deletions

View File

@ -301,6 +301,235 @@ const jsmntok_t *json_get_arr(const jsmntok_t tok[], size_t index)
return NULL;
}
/*-----------------------------------------------------------------------------
JSMN Result Validation Starts
-----------------------------------------------------------------------------*/
/*~ LIBJSMN is a fast, small JSON parsing library.
*
* "Fast, small" means it does not, in fact, do a
* lot of checking for invalid JSON.
*
* For example, by itself it would accept the strings
* `{"1" "2" "3" "4"}` and `["key": 1 2 3 4]` as valid.
* Obviously those are not in any way valid JSON.
*
* This part of the code performs some filtering so
* that at least some of the invalid JSON that
* LIBJSMN accepts, will be rejected by
* json_parse_input.
*/
/*~ These functions are used in JSMN validation.
*
* The calling convention is that the "current" token
* is passed in as the first argument, and after the
* validator, is returned from the function.
*
* p = validate_jsmn_datum(p, end, valid);
*
* The reason has to do with typical C ABIs.
* Usually, the first few arguments are passed in via
* register, and the return value is also returned
* via register.
* This calling convention generally ensures that
* the current token pointer `p` is always in a
* register and is never forced into memory by the
* compiler.
*
* These functions are pre-declared here as they
* are interrecursive.
* Note that despite the recursion, `p` is only ever
* advanced, and there is only ever one `p` value,
* thus the overall algorithm is strict O(n)
* (*not* amortized) in time.
* The recursion does mean the algorithm is O(d)
* in memory (specifically stack frames), where d
* is the nestedness of objects in the input.
* This may become an issue later if we are in a
* stack-limited environment, such as if we actually
* went and used threads.
*/
/* Validate a *single* datum. */
static const jsmntok_t *
validate_jsmn_datum(const jsmntok_t *p,
const jsmntok_t *end,
bool *valid);
/*~ Validate a key-value pair.
*
* In JSMN, objects are not dictionaries.
* Instead, they are a sequence of datums.
*
* In fact, objects and arrays in JSMN are "the same",
* they only differ in delimiter characters.
*
* Of course, in "real" JSON, an object is a dictionary
* of key-value pairs.
*
* So what JSMN does is that the syntax "key": "value"
* is considered a *single* datum, a string "key"
* that contains a value "value".
*
* Indeed, JSMN accepts `["key": "value"]` as well as
* `{"item1", "item2"}`.
* The entire point of the validate_jsmn_result function
* is to reject such improper arrays and objects.
*/
static const jsmntok_t *
validate_jsmn_keyvalue(const jsmntok_t *p,
const jsmntok_t *end,
bool *valid);
static const jsmntok_t *
validate_jsmn_datum(const jsmntok_t *p,
const jsmntok_t *end,
bool *valid)
{
int i;
int sz;
if (p >= end) {
*valid = false;
return p;
}
switch (p->type) {
case JSMN_UNDEFINED:
case JSMN_STRING:
case JSMN_PRIMITIVE:
/* These types should not have sub-datums. */
if (p->size != 0)
*valid = false;
else
++p;
break;
case JSMN_ARRAY:
/* Save the array size; we will advance p. */
sz = p->size;
++p;
for (i = 0; i < sz; ++i) {
/* Arrays should only contain standard JSON datums. */
p = validate_jsmn_datum(p, end, valid);
if (!*valid)
break;
}
break;
case JSMN_OBJECT:
/* Save the object size; we will advance p. */
sz = p->size;
++p;
for (i = 0; i < sz; ++i) {
/* Objects should only contain key-value pairs. */
p = validate_jsmn_keyvalue(p, end, valid);
if (!*valid)
break;
}
break;
default:
*valid = false;
break;
}
return p;
}
/* Key-value pairs *must* be strings with size 1. */
static inline const jsmntok_t *
validate_jsmn_keyvalue(const jsmntok_t *p,
const jsmntok_t *end,
bool *valid)
{
if (p >= end) {
*valid = false;
return p;
}
/* Check key.
*
* JSMN parses the syntax `"key": "value"` as a
* JSMN_STRING of size 1, containing the value
* datum as a sub-datum.
*
* Thus, keys in JSON objects are really strings
* that "contain" the value, thus we check if
* the size is 1.
*
* JSMN supports a non-standard syntax such as
* `"key": 1 2 3 4`, which it considers as a
* string object that contains a sequence of
* sub-datums 1 2 3 4.
* The check below that p->size == 1 also
* incidentally rejects that non-standard
* JSON.
*/
if (p->type != JSMN_STRING || p->size != 1) {
*valid = false;
return p;
}
++p;
return validate_jsmn_datum(p, end, valid);
}
/** validate_jsmn_parse_output
*
* @brief Validates the result of jsmn_parse.
*
* @desc LIBJMSN is a small fast library, not a
* comprehensive library.
*
* This simply means that LIBJSMN will accept a
* *lot* of very strange text that is technically
* not JSON.
*
* For example, LIBJSMN would accept the strings
* `{"1" "2" "3" "4"}` and `["key": 1 2 3 4]` as valid.
*
* This can lead to strange sequences of jsmntok_t
* objects.
* Unfortunately, most of our code assumes that
* the data fed into our JSON-RPC interface is
* valid JSON, and in particular is not invalid
* JSON that tickles LIBJSMN into emitting
* strange sequences of `jsmntok_t`.
*
* This function detects such possible problems
* and returns false if such an issue is found.
* If so, it is probably unsafe to pass the
* `jsmntok_t` generated by LIBJSMN to any other
* parts of our code.
*
* @param p - The first jsmntok_t token to process.
* This function does not assume that semantically
* only one JSON datum is processed; it does expect
* a sequence of complete JSON datums (which is
* what LIBJSMN *should* output).
* @param end - One past the end of jsmntok_t.
* Basically, this function is assured to read tokens
* starting at p up to end - 1.
* If p >= end, this will not validate anything and
* trivially return true.
*
* @return true if there appears to be no problem
* with the jsmntok_t sequence outputted by
* `jsmn_parse`, false otherwise.
*/
static bool
validate_jsmn_parse_output(const jsmntok_t *p, const jsmntok_t *end)
{
bool valid = true;
while (p < end && valid)
p = validate_jsmn_datum(p, end, &valid);
return valid;
}
/*-----------------------------------------------------------------------------
JSMN Result Validation Ends
-----------------------------------------------------------------------------*/
jsmntok_t *json_parse_input(const tal_t *ctx,
const char *input, int len, bool *valid)
{
@ -338,7 +567,7 @@ again:
ret = json_next(toks) - toks;
/* Cut to length and return. */
*valid = true;
*valid = validate_jsmn_parse_output(toks, toks + ret);
tal_resize(&toks, ret + 1);
/* Make sure last one is always referenceable. */
toks[ret].type = -1;

View File

@ -122,20 +122,25 @@ static void test_json_tok_size(void)
assert(toks[0].size == 2);
assert(toks[1].size == 2);
buf = "{\"e1\" : {\"e2\", \"e3\"}}";
buf = "{\"e1\" : {\"e2\": 2, \"e3\": 3}}";
toks = json_parse_input(tmpctx, buf, strlen(buf), &ok);
assert(ok);
/* size only counts *direct* children */
assert(toks[0].size == 1);
assert(toks[2].size == 2);
buf = "{\"e1\" : {\"e2\", \"e3\"}, \"e4\" : {\"e5\", \"e6\"}}";
buf = "{\"e1\" : {\"e2\": 2, \"e3\": 3}, \"e4\" : {\"e5\": 5, \"e6\": 6}}";
toks = json_parse_input(tmpctx, buf, strlen(buf), &ok);
assert(ok);
/* size only counts *direct* children */
assert(toks[0].size == 2);
assert(toks[2].size == 2);
assert(toks[6].size == 2);
assert(toks[8].size == 2);
/* This should *not* parse! (used to give toks[0]->size == 3!) */
buf = "{ \"\" \"\" \"\" }";
toks = json_parse_input(tmpctx, buf, strlen(buf), &ok);
assert(!ok);
/* This should *not* parse! (used to give toks[0]->size == 2!) */
buf = "{ 'satoshi', '546' }";