diff --git a/common/json.c b/common/json.c index 30ffe04a2..949132647 100644 --- a/common/json.c +++ b/common/json.c @@ -301,6 +301,235 @@ const jsmntok_t *json_get_arr(const jsmntok_t tok[], size_t index) return NULL; } +/*----------------------------------------------------------------------------- +JSMN Result Validation Starts +-----------------------------------------------------------------------------*/ +/*~ LIBJSMN is a fast, small JSON parsing library. + * + * "Fast, small" means it does not, in fact, do a + * lot of checking for invalid JSON. + * + * For example, by itself it would accept the strings + * `{"1" "2" "3" "4"}` and `["key": 1 2 3 4]` as valid. + * Obviously those are not in any way valid JSON. + * + * This part of the code performs some filtering so + * that at least some of the invalid JSON that + * LIBJSMN accepts, will be rejected by + * json_parse_input. + */ + +/*~ These functions are used in JSMN validation. + * + * The calling convention is that the "current" token + * is passed in as the first argument, and after the + * validator, is returned from the function. + * + * p = validate_jsmn_datum(p, end, valid); + * + * The reason has to do with typical C ABIs. + * Usually, the first few arguments are passed in via + * register, and the return value is also returned + * via register. + * This calling convention generally ensures that + * the current token pointer `p` is always in a + * register and is never forced into memory by the + * compiler. + * + * These functions are pre-declared here as they + * are interrecursive. + * Note that despite the recursion, `p` is only ever + * advanced, and there is only ever one `p` value, + * thus the overall algorithm is strict O(n) + * (*not* amortized) in time. + * The recursion does mean the algorithm is O(d) + * in memory (specifically stack frames), where d + * is the nestedness of objects in the input. + * This may become an issue later if we are in a + * stack-limited environment, such as if we actually + * went and used threads. + */ +/* Validate a *single* datum. */ +static const jsmntok_t * +validate_jsmn_datum(const jsmntok_t *p, + const jsmntok_t *end, + bool *valid); +/*~ Validate a key-value pair. + * + * In JSMN, objects are not dictionaries. + * Instead, they are a sequence of datums. + * + * In fact, objects and arrays in JSMN are "the same", + * they only differ in delimiter characters. + * + * Of course, in "real" JSON, an object is a dictionary + * of key-value pairs. + * + * So what JSMN does is that the syntax "key": "value" + * is considered a *single* datum, a string "key" + * that contains a value "value". + * + * Indeed, JSMN accepts `["key": "value"]` as well as + * `{"item1", "item2"}`. + * The entire point of the validate_jsmn_result function + * is to reject such improper arrays and objects. + */ +static const jsmntok_t * +validate_jsmn_keyvalue(const jsmntok_t *p, + const jsmntok_t *end, + bool *valid); + +static const jsmntok_t * +validate_jsmn_datum(const jsmntok_t *p, + const jsmntok_t *end, + bool *valid) +{ + int i; + int sz; + + if (p >= end) { + *valid = false; + return p; + } + + switch (p->type) { + case JSMN_UNDEFINED: + case JSMN_STRING: + case JSMN_PRIMITIVE: + /* These types should not have sub-datums. */ + if (p->size != 0) + *valid = false; + else + ++p; + break; + + case JSMN_ARRAY: + /* Save the array size; we will advance p. */ + sz = p->size; + ++p; + for (i = 0; i < sz; ++i) { + /* Arrays should only contain standard JSON datums. */ + p = validate_jsmn_datum(p, end, valid); + if (!*valid) + break; + } + break; + + case JSMN_OBJECT: + /* Save the object size; we will advance p. */ + sz = p->size; + ++p; + for (i = 0; i < sz; ++i) { + /* Objects should only contain key-value pairs. */ + p = validate_jsmn_keyvalue(p, end, valid); + if (!*valid) + break; + } + break; + + default: + *valid = false; + break; + } + + return p; +} +/* Key-value pairs *must* be strings with size 1. */ +static inline const jsmntok_t * +validate_jsmn_keyvalue(const jsmntok_t *p, + const jsmntok_t *end, + bool *valid) +{ + if (p >= end) { + *valid = false; + return p; + } + + /* Check key. + * + * JSMN parses the syntax `"key": "value"` as a + * JSMN_STRING of size 1, containing the value + * datum as a sub-datum. + * + * Thus, keys in JSON objects are really strings + * that "contain" the value, thus we check if + * the size is 1. + * + * JSMN supports a non-standard syntax such as + * `"key": 1 2 3 4`, which it considers as a + * string object that contains a sequence of + * sub-datums 1 2 3 4. + * The check below that p->size == 1 also + * incidentally rejects that non-standard + * JSON. + */ + if (p->type != JSMN_STRING || p->size != 1) { + *valid = false; + return p; + } + + ++p; + return validate_jsmn_datum(p, end, valid); +} + +/** validate_jsmn_parse_output + * + * @brief Validates the result of jsmn_parse. + * + * @desc LIBJMSN is a small fast library, not a + * comprehensive library. + * + * This simply means that LIBJSMN will accept a + * *lot* of very strange text that is technically + * not JSON. + * + * For example, LIBJSMN would accept the strings + * `{"1" "2" "3" "4"}` and `["key": 1 2 3 4]` as valid. + * + * This can lead to strange sequences of jsmntok_t + * objects. + * Unfortunately, most of our code assumes that + * the data fed into our JSON-RPC interface is + * valid JSON, and in particular is not invalid + * JSON that tickles LIBJSMN into emitting + * strange sequences of `jsmntok_t`. + * + * This function detects such possible problems + * and returns false if such an issue is found. + * If so, it is probably unsafe to pass the + * `jsmntok_t` generated by LIBJSMN to any other + * parts of our code. + * + * @param p - The first jsmntok_t token to process. + * This function does not assume that semantically + * only one JSON datum is processed; it does expect + * a sequence of complete JSON datums (which is + * what LIBJSMN *should* output). + * @param end - One past the end of jsmntok_t. + * Basically, this function is assured to read tokens + * starting at p up to end - 1. + * If p >= end, this will not validate anything and + * trivially return true. + * + * @return true if there appears to be no problem + * with the jsmntok_t sequence outputted by + * `jsmn_parse`, false otherwise. + */ +static bool +validate_jsmn_parse_output(const jsmntok_t *p, const jsmntok_t *end) +{ + bool valid = true; + + while (p < end && valid) + p = validate_jsmn_datum(p, end, &valid); + + return valid; +} + +/*----------------------------------------------------------------------------- +JSMN Result Validation Ends +-----------------------------------------------------------------------------*/ + jsmntok_t *json_parse_input(const tal_t *ctx, const char *input, int len, bool *valid) { @@ -338,7 +567,7 @@ again: ret = json_next(toks) - toks; /* Cut to length and return. */ - *valid = true; + *valid = validate_jsmn_parse_output(toks, toks + ret); tal_resize(&toks, ret + 1); /* Make sure last one is always referenceable. */ toks[ret].type = -1; diff --git a/common/test/run-json.c b/common/test/run-json.c index 04e583e7d..adb88610b 100644 --- a/common/test/run-json.c +++ b/common/test/run-json.c @@ -122,20 +122,25 @@ static void test_json_tok_size(void) assert(toks[0].size == 2); assert(toks[1].size == 2); - buf = "{\"e1\" : {\"e2\", \"e3\"}}"; + buf = "{\"e1\" : {\"e2\": 2, \"e3\": 3}}"; toks = json_parse_input(tmpctx, buf, strlen(buf), &ok); assert(ok); /* size only counts *direct* children */ assert(toks[0].size == 1); assert(toks[2].size == 2); - buf = "{\"e1\" : {\"e2\", \"e3\"}, \"e4\" : {\"e5\", \"e6\"}}"; + buf = "{\"e1\" : {\"e2\": 2, \"e3\": 3}, \"e4\" : {\"e5\": 5, \"e6\": 6}}"; toks = json_parse_input(tmpctx, buf, strlen(buf), &ok); assert(ok); /* size only counts *direct* children */ assert(toks[0].size == 2); assert(toks[2].size == 2); - assert(toks[6].size == 2); + assert(toks[8].size == 2); + + /* This should *not* parse! (used to give toks[0]->size == 3!) */ + buf = "{ \"\" \"\" \"\" }"; + toks = json_parse_input(tmpctx, buf, strlen(buf), &ok); + assert(!ok); /* This should *not* parse! (used to give toks[0]->size == 2!) */ buf = "{ 'satoshi', '546' }";