rgb-cln/tools/fromschema.py

#! /usr/bin/env python3
# Script to turn JSON schema into markdown documentation and replace in-place.
# Released by Rusty Russell under CC0:
# https://creativecommons.org/publicdomain/zero/1.0/
from argparse import ArgumentParser
import json
import re


def esc_underscores(s):
    """Backslash-escape underscores outside of backtick-enclosed spans"""
    return ''.join(['\\_' if x == '_' else x for x in re.findall(r'[^`_\\]+|`(?:[^`\\]|\\.)*`|\\.|_', s)])


def json_value(obj):
    """Format obj in the JSON style for a value"""
    if type(obj) is bool:
        if obj:
            return '*true*'
        return '*false*'
    if type(obj) is str:
        return '"' + esc_underscores(obj) + '"'
    if obj is None:
        return '*null*'
    assert False


def outputs(lines):
    """Add these lines to the final output"""
    print(''.join(lines), end='')


def output(line):
    """Add this line to the final output"""
    print(line, end='')


def output_type(properties, is_optional):
    typename = esc_underscores(properties['type'])
    if typename == 'array':
        typename += ' of {}s'.format(esc_underscores(properties['items']['type']))
    if is_optional:
        typename += ", optional"
    output(" ({})".format(typename))


def output_range(properties):
    if 'maximum' and 'minimum' in properties:
        output(" ({} to {} inclusive)".format(properties['minimum'],
                                              properties['maximum']))
    elif 'maximum' in properties:
        output(" (max {})".format(properties['maximum']))
    elif 'minimum' in properties:
        output(" (min {})".format(properties['minimum']))

    if 'maxLength' and 'minLength' in properties:
        if properties['minLength'] == properties['maxLength']:
            output(' (always {} characters)'.format(properties['minLength']))
        else:
            output(' ({} to {} characters)'.format(properties['minLength'],
                                                   properties['maxLength']))
    elif 'maxLength' in properties:
        output(' (up to {} characters)'.format(properties['maxLength']))
    elif 'minLength' in properties:
        output(' (at least {} characters)'.format(properties['minLength']))

    if 'enum' in properties:
        if len(properties['enum']) == 1:
            output(" (always {})".format(json_value(properties['enum'][0])))
        else:
            output(' (one of {})'.format(', '.join([json_value(p) for p in properties['enum']])))


def fmt_propname(propname):
    """Pretty-print format a property name"""
    return '**{}**'.format(esc_underscores(propname))


def deprecated_to_deleted(vername):
    """We promise a 6 month minumum deprecation period, and versions are every 3 months"""
    assert vername.startswith('v')
    base = [int(s) for s in vername[1:].split('.')[0:2]]
    if base == [0, 12]:
        base = [22, 8]
    base[1] += 9
    if base[1] > 12:
        base[0] += 1
        base[1] -= 12
    # Christian points out versions should sort well lexographically,
    # so we zero-pad single-digits.
    return 'v{}.{:0>2}'.format(base[0], base[1])


def output_member(propname, properties, is_optional, indent, print_type=True, prefix=None):
    """Generate description line(s) for this member"""

    if prefix is None:
        prefix = '- ' + fmt_propname(propname)
    output(indent + prefix)

    # We make them explicitly note if they don't want a type!
    is_untyped = 'untyped' in properties

    if not is_untyped and print_type:
        output_type(properties, is_optional)

    if 'description' in properties:
        output(": {}".format(esc_underscores(properties['description'])))

    output_range(properties)

    if 'deprecated' in properties:
        output(" **deprecated, removal in {}**".format(deprecated_to_deleted(properties['deprecated'])))
    if 'added' in properties:
        output(" *(added {})*".format(properties['added']))

    if not is_untyped and properties['type'] == 'object':
        output(':\n')
        output_members(properties, indent + '  ')
    elif not is_untyped and properties['type'] == 'array':
        output(':\n')
        output_array(properties['items'], indent + '  ')
    else:
        output('\n')


def output_array(items, indent):
    """We've already said it's an array of {type}"""
    if items['type'] == 'object':
        output_members(items, indent)
    elif items['type'] == 'array':
        output(indent + '- {}:\n'.format(esc_underscores(items['description'])))
        output_array(items['items'], indent + '  ')
    else:
        output(indent + '- {}'.format(esc_underscores(items['description'])))
        output_range(items)
        output('\n')


def has_members(sub):
    """Does this sub have any properties to print?"""
    for p in list(sub['properties'].keys()):
        if len(sub['properties'][p]) == 0:
            continue
        if sub['properties'][p].get('deprecated') is True:
            continue
        return True
    return False


def output_members(sub, indent=''):
    """Generate lines for these properties"""
    warnings = []

    # Remove deprecated: True and stub properties, collect warnings
    # (Stubs required to keep additionalProperties: false happy)

    # FIXME: It fails for schemas which have only an array type with
    # no properties, ex:
    # "abcd": {
    #  "type": "array",
    #   "items": {
    #    "type": "whatever",
    #    "description": "efgh"
    #   }
    # }
    # Checkout the schema of `staticbackup`.
    for p in list(sub['properties'].keys()):
        if len(sub['properties'][p]) == 0 or sub['properties'][p].get('deprecated') is True:
            del sub['properties'][p]
        elif p.startswith('warning'):
            warnings.append(p)

    # First list always-present properties
    for p in sub['properties']:
        if p.startswith('warning'):
            continue
        if 'required' in sub and p in sub['required']:
            output_member(p, sub['properties'][p], False, indent)

    for p in sub['properties']:
        if p.startswith('warning'):
            continue
        if 'required' not in sub or p not in sub['required']:
            output_member(p, sub['properties'][p], True, indent)

    if warnings != []:
        output(indent + "- the following warnings are possible:\n")
        for w in warnings:
            output_member(w, sub['properties'][w], False, indent + '  ', print_type=False)

    # Not handled.
    assert 'oneOf' not in sub

    # If we have multiple ifs, we have to wrap them in allOf.
    if 'allOf' in sub:
        ifclauses = sub['allOf']
    elif 'if' in sub:
        ifclauses = [sub]
    else:
        ifclauses = []

    # We partially handle if, assuming it depends on particular values of prior properties.
    for ifclause in ifclauses:
        conditions = []

        # "required" are fields that simply must be present
        for r in ifclause['if'].get('required', []):
            conditions.append(fmt_propname(r) + ' is present')

        # "properties" are enums of field values
        for tag, vals in ifclause['if'].get('properties', {}).items():
            # Don't have a description field here, it's not used.
            assert 'description' not in vals
            whichvalues = vals['enum']

            cond = fmt_propname(tag) + " is"
            if len(whichvalues) == 1:
                cond += " {}".format(json_value(whichvalues[0]))
            else:
                cond += " {} or {}".format(", ".join([json_value(v) for v in whichvalues[:-1]]),
                                           json_value(whichvalues[-1]))
            conditions.append(cond)

        sentence = indent + "If " + ", and ".join(conditions) + ":\n\n"

        if has_members(ifclause['then']):
            # Prefix with blank line.
            outputs(['\n', sentence])

            output_members(ifclause['then'], indent + '  ')


def generate_from_schema(schema):
    """This is not general, but works for us"""
    if schema['type'] != 'object':
        # 'stop' returns a single string!
        output_member(None, schema, False, '', prefix='On success, returns a single element')
        return

    toplevels = []
    warnings = []
    props = schema['properties']

    # We handle warnings on top-level objects with a separate section,
    # so collect them now and remove them
    for toplevel in list(props.keys()):
        if toplevel.startswith('warning'):
            warnings.append((toplevel, props[toplevel]['description']))
            del props[toplevel]
        else:
            toplevels.append(toplevel)

    # No properties -> empty object.
    if toplevels == []:
        output('On success, an empty object is returned.\n')
        sub = schema
    elif len(toplevels) == 1 and props[toplevels[0]]['type'] == 'object':
        output('On success, an object containing {} is returned.  It is an object containing:\n\n'.format(fmt_propname(toplevels[0])))
        # Don't have a description field here, it's not used.
        assert 'description' not in toplevels[0]
        sub = props[toplevels[0]]
    elif len(toplevels) == 1 and props[toplevels[0]]['type'] == 'array' and props[toplevels[0]]['items']['type'] == 'object':
        output('On success, an object containing {} is returned.  It is an array of objects, where each object contains:\n\n'.format(fmt_propname(toplevels[0])))
        # Don't have a description field here, it's not used.
        assert 'description' not in toplevels[0]
        sub = props[toplevels[0]]['items']
    else:
        output('On success, an object is returned, containing:\n\n')
        sub = schema

    output_members(sub)

    if warnings:
        outputs(['\n', 'The following warnings may also be returned:\n\n'])
        for w, desc in warnings:
            output("- {}: {}\n".format(fmt_propname(w), desc))

    # GH markdown rendering gets upset if there isn't a blank line
    # between a list and the end comment.
    output('\n')


def main(schemafile, markdownfile):
    start_marker = '[comment]: # (GENERATE-FROM-SCHEMA-START)\n'
    end_marker = '[comment]: # (GENERATE-FROM-SCHEMA-END)\n'

    if markdownfile is None:
        with open(schemafile, "r") as f:
            schema = json.load(f)
        generate_from_schema(schema)
        return

    with open(markdownfile, "r") as f:
        md = f.readlines()

    suppress_output = False
    for line in md:
        if line == end_marker:
            suppress_output = False

        if not suppress_output:
            print(line, end='')

        if line == start_marker:
            with open(schemafile, "r") as f:
                schema = json.load(f)
            generate_from_schema(schema)
            suppress_output = True


if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument('schemafile', help='The schema file to use')
    parser.add_argument('--markdownfile', help='The markdown file to read')
    parsed_args = parser.parse_args()

    main(parsed_args.schemafile, parsed_args.markdownfile)