msggen: Parse JSON-RPC schemas and build the in-memory model

We build an in-memory model of what the API should look like, which
will later be used to generate a variety of bindings. In this PR we
will use the model to build structs corresponding to the requests and
responses for the various methods.

The JSON-RPC schemas serve as ground-truth, however they are missing a
bit of context: methods, and the request-response matching (as well as
a higher level grouping we'll call a Service). I'm tempted to create a
new document that describes this behavior and we could even generate
the rather repetitive JSON schemas from that document. Furthermore
it'd allow us to add some required metadata such as grpc field
numbering once we generate those bindings.

Changelog-Added: JSON-RPC: A new `msggen` library allows easy generation of language bindings for the JSON-RPC from the JSON schemas
This commit is contained in:
Christian Decker 2022-01-14 13:49:56 +01:00
parent 95eb868047
commit 0fc0ffc961
4 changed files with 342 additions and 0 deletions

23
contrib/msggen/README.md Normal file
View File

@ -0,0 +1,23 @@
# MsgGen - Generating language bindings and docs from schemas and wire descriptions
MsgGen is a collection of tools that are used to parse schemas and
(eventually) protocol wire CSVs into an intermediate representation in
memory, and then generate language specific bindings and
documentation from it.
The dependency graph looks like this:
```dot
digraph {
"JSON-RPC Schemas" -> "msggen model";
"msggen model" -> "grpc proto file";
"msggen model" -> "Rust From<JsonRpc> Converters";
"grpc proto file" -> "Rust grpc bindings"
"Rust grpc bindings" -> "cln-grpc";
"Rust From<JsonRpc> Converters" -> "cln-grpc";
"msggen model" -> "Rust JSON-RPC structs";
"Rust JSON-RPC structs" -> "cln-rpc";
}
```

View File

View File

@ -0,0 +1,300 @@
from typing import List, Union, Optional
import logging
logger = logging.getLogger(__name__)
def path2type(path):
typename = "".join([s.capitalize() for s in path.replace("[]", "").split(".")])
return typename
class FieldName:
def __init__(self, name):
self.name = name
def normalized(self):
name = {
"type": "item_type"
}.get(self.name, self.name)
name = name.replace(' ', '_').replace('-', '_')
return name
def __str__(self):
return self.name
class Field:
def __init__(self, path, description):
self.path = path
self.description = description
self.required = False
@property
def name(self):
return FieldName(self.path.split(".")[-1])
def __str__(self):
return f"Field[path={self.path}, required={self.required}]"
def __repr__(self):
return str(self)
def normalized(self):
return self.name.normalized()
class Service:
"""Top level class that wraps all the RPC methods.
"""
def __init__(self, name: str, methods=None):
self.name = name
self.methods = [] if methods is None else methods
# If we require linking with some external files we'll add
# them here so the generator can use them.
self.includes: List[str] = []
def gather_types(self):
"""Gather all types that might need to be defined.
"""
def gather_subfields(field: Field) -> List[Field]:
fields = [field]
if isinstance(field, CompositeField):
for f in field.fields:
fields.extend(gather_subfields(f))
elif isinstance(field, ArrayField):
fields = []
fields.extend(gather_subfields(field.itemtype))
return fields
types = []
for method in self.methods:
types.extend([method.request, method.response])
for field in method.request.fields:
types.extend(gather_subfields(field))
for field in method.response.fields:
types.extend(gather_subfields(field))
return types
class Method:
def __init__(self, name: str, request: Field, response: Field):
self.name = name
self.request = request
self.response = response
class CompositeField(Field):
def __init__(self, typename, fields, path, description):
Field.__init__(self, path, description)
self.typename = typename
self.fields = fields
@classmethod
def from_js(cls, js, path):
typename = path2type(path)
properties = js["properties"]
# Ok, let's flatten the conditional properties. We do this by
# reformatting the outer conditions into the `allOf` format.
top = {
'then': {'properties': js.get('then', {}).get('properties', [])},
'else': {'properties': js.get('else', {}).get('properties', [])},
}
# Yes, this is ugly, but walking nested dicts always is.
for a in [top] + js.get('allOf', []):
var = a.get('then', {})
props = var.get('properties', None)
if isinstance(props, dict):
for k, v in props.items():
if k not in properties:
properties[k] = v
var = a.get('else', {})
props = var.get('properties', None)
if isinstance(props, dict):
for k, v in props.items():
if k not in properties:
properties[k] = v
# Identify required fields
required = js.get("required", [])
fields = []
for fname, ftype in properties.items():
field = None
desc = ftype["description"] if "description" in ftype else ""
fpath = f"{path}.{fname}"
if ftype.get("deprecated", False):
logger.warning(f"Unmanaged {fpath}, it is deprecated")
continue
if "type" not in ftype:
logger.warning(f"Unmanaged {fpath}, it doesn't have a type")
continue
# TODO Remove the `['string', 'null']` match once
# `listpeers.peers[].channels[].closer` no longer has this
# type
if ftype["type"] == ["string", "null"]:
ftype["type"] = "string"
# Peek into the type so we know how to decode it
if ftype["type"] in ["string", ["string", "null"]] and "enum" in ftype:
field = EnumField.from_js(ftype, fpath)
elif ftype["type"] == "object":
field = CompositeField.from_js(ftype, fpath)
elif ftype["type"] == "array":
field = ArrayField.from_js(fpath, ftype)
elif ftype["type"] in PrimitiveField.types:
field = PrimitiveField(ftype["type"], fpath, desc)
else:
logger.warning(
f"Unmanaged {path}, type {ftype} is not mapped in the object model"
)
if field is not None:
field.required = fname in required
fields.append(field)
logger.debug(field)
return CompositeField(
typename, fields, path, js["description"] if "description" in js else ""
)
def __str__(self):
fieldnames = ",".join([f.path.split(".")[-1] for f in self.fields])
return f"CompositeField[name={self.path}, fields=[{fieldnames}]]"
class EnumVariant(Field):
"""A variant of an enum with helpers for normalization of the display.
"""
def __init__(self, variant: Optional[str]):
self.variant = variant
def __str__(self):
return self.variant
def normalized(self):
return self.variant.replace(' ', '_').replace('-', '_').upper()
class EnumField(Field):
def __init__(self, typename, values, path, description):
Field.__init__(self, path, description)
self.typename = typename
self.values = values
self.variants = [EnumVariant(v) for v in self.values]
@classmethod
def from_js(cls, js, path):
# Transform the path into something that is a valid TypeName
typename = path2type(path)
return EnumField(
typename,
values=filter(lambda i: i is not None, js["enum"]),
path=path,
description=js["description"] if "description" in js else "",
)
def __str__(self):
values = ",".join([v for v in self.values if v is not None])
return f"Enum[path={self.path}, required={self.required}, values=[{values}]]"
class PrimitiveField(Field):
# Leaf types that we expect the binding languages to provide
types = [
"boolean",
"u32",
"u64",
"u8",
"string",
"pubkey",
"signature",
"msat",
"hex",
"short_channel_id",
"txid",
"integer",
"u16",
"number",
]
def __init__(self, typename, path, description):
Field.__init__(self, path, description)
self.typename = typename
def __str__(self):
return f"Primitive[path={self.path}, required={self.required}, type={self.typename}]"
class ArrayField(Field):
def __init__(self, itemtype, dims, path, description):
Field.__init__(self, path, description)
self.itemtype = itemtype
self.dims = dims
self.path = path
@classmethod
def from_js(cls, path, js):
# Determine how nested we are
dims = 1
child_js = js["items"]
while child_js["type"] == "array":
dims += 1
child_js = child_js["items"]
path += "[]" * dims
if child_js["type"] == "object":
itemtype = CompositeField.from_js(child_js, path)
elif child_js["type"] == "string" and "enum" in child_js:
itemtype = EnumField.from_js(child_js, path)
elif child_js["type"] in PrimitiveField.types:
itemtype = PrimitiveField(
child_js["type"], path, child_js.get("description", "")
)
logger.debug(f"Array path={path} dims={dims}, type={itemtype}")
return ArrayField(
itemtype, dims=dims, path=path, description=js.get("description", "")
)
def normalized(self):
# Strip the '[]' that we use to signal an array. The name
# itself doesn't need this.
return Field.normalized(self)[:-2]
class Command:
def __init__(self, name, fields):
self.name = name
self.fields = fields
def __str__(self):
fieldnames = ",".join([f.path.split(".")[-1] for f in self.fields])
return f"Command[name={self.name}, fields=[{fieldnames}]]"
def parse_doc(command, js) -> Union[CompositeField, Command]:
"""Given a command name and its schema, generate the IR model"""
path = command
# All our top-level wrappers are objects, right?
assert js["type"] in ["object", "string"]
if js["type"] == "string":
# Special case: stop just returns a string
return Command(path.capitalize(), [])
else:
return CompositeField.from_js(js, path)

View File

@ -0,0 +1,19 @@
[tool.poetry]
name = "msggen"
version = "0.1.0"
description = "A utility to transform wire messages and JSON-RPC messages to arbitrary target languages."
authors = ["Christian Decker <decker@blockstream.com>"]
license = "BSD-MIT"
[tool.poetry.dependencies]
python = "^3.6"
[tool.poetry.dev-dependencies]
pytest = "^6.2.5"
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
[tool.poetry.scripts]
msggen = 'msggen.__main__:run'