Files
qemu/scripts/qapi/common.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

258 lines
7.7 KiB
Python
Raw Permalink Normal View History

#
# QAPI helper library
#
# Copyright IBM, Corp. 2011
# Copyright (c) 2013-2018 Red Hat Inc.
#
# Authors:
# Anthony Liguori <aliguori@us.ibm.com>
qapi.py: Restructure lexer and parser The parser has a rather unorthodox structure: Until EOF: Read a section: Generator function get_expr() yields one section after the other, as a string. An unindented, non-empty line that isn't a comment starts a new section. Lexing: Split section into a list of tokens (strings), with help of generator function tokenize(). Parsing: Parse the first expression from the list of tokens, with parse(), throw away any remaining tokens. In parse_schema(): record value of an enum, union or struct key (if any) in the appropriate global table, append expression to the list of expressions. Return list of expressions. Known issues: (1) Indentation is significant, unlike in real JSON. (2) Neither lexer nor parser have any idea of source positions. Error reporting is hard, let's go shopping. (3) The one error we bother to detect, we "report" via raise. (4) The lexer silently ignores invalid characters. (5) If everything in a section gets ignored, the parser crashes. (6) The lexer treats a string containing a structural character exactly like the structural character. (7) Tokens trailing the first expression in a section are silently ignored. (8) The parser accepts any token in place of a colon. (9) The parser treats comma as optional. (10) parse() crashes on unexpected EOF. (11) parse_schema() crashes when a section's expression isn't a JSON object. Replace this piece of original art by a thoroughly unoriginal design. Takes care of (1), (2), (5), (6) and (7), and lays the groundwork for addressing the others. Generated source files remain unchanged. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Message-id: 1374939721-7876-4-git-send-email-armbru@redhat.com Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
2013-07-27 17:41:55 +02:00
# Markus Armbruster <armbru@redhat.com>
#
# This work is licensed under the terms of the GNU GPL, version 2.
# See the COPYING file in the top-level directory.
import re
from typing import (
Any,
Dict,
Match,
Optional,
Sequence,
Union,
)
#: Magic string that gets removed along with all space to its right.
EATSPACE = '\033EATSPACE.'
POINTER_SUFFIX = ' *' + EATSPACE
def camel_to_upper(value: str) -> str:
"""
Converts CamelCase to CAMEL_CASE.
Examples::
ENUMName -> ENUM_NAME
EnumName1 -> ENUM_NAME1
ENUM_NAME -> ENUM_NAME
ENUM_NAME1 -> ENUM_NAME1
ENUM_Name2 -> ENUM_NAME2
ENUM24_Name -> ENUM24_NAME
"""
qapi: Smarter camel_to_upper() to reduce need for 'prefix' camel_to_upper() converts its argument from camel case to upper case with '_' between words. Used for generated enumeration constant prefixes. When some of the words are spelled all caps, where exactly to insert '_' is guesswork. camel_to_upper()'s guesses are bad enough in places to make people override them with a 'prefix' in the schema. Rewrite it to guess better: 1. Insert '_' after a non-upper case character followed by an upper case character: OneTwo -> ONE_TWO One2Three -> ONE2_THREE 2. Insert '_' before the last upper case character followed by a non-upper case character: ACRONYMWord -> ACRONYM_Word Except at the beginning (as in OneTwo above), or when there is already one: AbCd -> AB_CD This changes the default enumeration constant prefix for a number of enums. Generated enumeration constants change only where the default is not overridden with 'prefix'. The following enumerations without a 'prefix' change: enum old camel_to_upper() new camel_to_upper() ------------------------------------------------------------------ DisplayGLMode DISPLAYGL_MODE DISPLAY_GL_MODE EbpfProgramID EBPF_PROGRAMID EBPF_PROGRAM_ID HmatLBDataType HMATLB_DATA_TYPE HMAT_LB_DATA_TYPE HmatLBMemoryHierarchy HMATLB_MEMORY_HIERARCHY HMAT_LB_MEMORY_HIERARCHY MultiFDCompression MULTIFD_COMPRESSION MULTI_FD_COMPRESSION OffAutoPCIBAR OFF_AUTOPCIBAR OFF_AUTO_PCIBAR QCryptoBlockFormat Q_CRYPTO_BLOCK_FORMAT QCRYPTO_BLOCK_FORMAT QCryptoBlockLUKSKeyslotState Q_CRYPTO_BLOCKLUKS_KEYSLOT_STATE QCRYPTO_BLOCK_LUKS_KEYSLOT_STATE QKeyCode Q_KEY_CODE QKEY_CODE XDbgBlockGraphNodeType X_DBG_BLOCK_GRAPH_NODE_TYPE XDBG_BLOCK_GRAPH_NODE_TYPE TestUnionEnumA TEST_UNION_ENUMA TEST_UNION_ENUM_A Add a 'prefix' so generated code doesn't change now. Subsequent commits will remove most of them again. Two will remain: MULTIFD_COMPRESSION, because migration code generally spells "multifd" that way, and Q_KEY_CODE, because that one is baked into subprojects/keycodemapdb/tools/keymap-gen. The following enumerations with a 'prefix' change so that the prefix is now superfluous: enum old camel_to_upper() new camel_to_upper() [equal to prefix] ------------------------------------------------------------------ BlkdebugIOType BLKDEBUGIO_TYPE BLKDEBUG_IO_TYPE QCryptoTLSCredsEndpoint Q_CRYPTOTLS_CREDS_ENDPOINT QCRYPTO_TLS_CREDS_ENDPOINT QCryptoSecretFormat Q_CRYPTO_SECRET_FORMAT QCRYPTO_SECRET_FORMAT QCryptoCipherMode Q_CRYPTO_CIPHER_MODE QCRYPTO_CIPHER_MODE QCryptodevBackendType Q_CRYPTODEV_BACKEND_TYPE QCRYPTODEV_BACKEND_TYPE QType [builtin] Q_TYPE QTYPE Drop these prefixes. The following enumerations with a 'prefix' change without making the 'prefix' superfluous: enum old camel_to_upper() new camel_to_upper() [equal to prefix] prefix ------------------------------------------------------------------ CpuS390Entitlement CPUS390_ENTITLEMENT CPU_S390_ENTITLEMENT S390_CPU_ENTITLEMENT CpuS390Polarization CPUS390_POLARIZATION CPU_S390_POLARIZATION S390_CPU_POLARIZATION CpuS390State CPUS390_STATE CPU_S390_STATE S390_CPU_STATE QAuthZListFormat Q_AUTHZ_LIST_FORMAT QAUTH_Z_LIST_FORMAT QAUTHZ_LIST_FORMAT QAuthZListPolicy Q_AUTHZ_LIST_POLICY QAUTH_Z_LIST_POLICY QAUTHZ_LIST_POLICY QCryptoAkCipherAlgorithm Q_CRYPTO_AK_CIPHER_ALGORITHM QCRYPTO_AK_CIPHER_ALGORITHM QCRYPTO_AKCIPHER_ALG QCryptoAkCipherKeyType Q_CRYPTO_AK_CIPHER_KEY_TYPE QCRYPTO_AK_CIPHER_KEY_TYPE QCRYPTO_AKCIPHER_KEY_TYPE QCryptoCipherAlgorithm Q_CRYPTO_CIPHER_ALGORITHM QCRYPTO_CIPHER_ALGORITHM QCRYPTO_CIPHER_ALG QCryptoHashAlgorithm Q_CRYPTO_HASH_ALGORITHM QCRYPTO_HASH_ALGORITHM QCRYPTO_HASH_ALG QCryptoIVGenAlgorithm Q_CRYPTOIV_GEN_ALGORITHM QCRYPTO_IV_GEN_ALGORITHM QCRYPTO_IVGEN_ALG QCryptoRSAPaddingAlgorithm Q_CRYPTORSA_PADDING_ALGORITHM QCRYPTO_RSA_PADDING_ALGORITHM QCRYPTO_RSA_PADDING_ALG QCryptodevBackendAlgType Q_CRYPTODEV_BACKEND_ALG_TYPE QCRYPTODEV_BACKEND_ALG_TYPE QCRYPTODEV_BACKEND_ALG QCryptodevBackendServiceType Q_CRYPTODEV_BACKEND_SERVICE_TYPE QCRYPTODEV_BACKEND_SERVICE_TYPE QCRYPTODEV_BACKEND_SERVICE Subsequent commits will tweak things to remove most of these prefixes. Only QAUTHZ_LIST_FORMAT and QAUTHZ_LIST_POLICY will remain. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> Message-ID: <20240904111836.3273842-2-armbru@redhat.com>
2024-09-04 13:18:18 +02:00
ret = value[0]
upc = value[0].isupper()
# Copy remainder of ``value`` to ``ret`` with '_' inserted
for ch in value[1:]:
if ch.isupper() == upc:
pass
elif upc:
# ``ret`` ends in upper case, next char isn't: insert '_'
# before the last upper case char unless there is one
# already, or it's at the beginning
if len(ret) > 2 and ret[-2].isalnum():
ret = ret[:-1] + '_' + ret[-1]
else:
# ``ret`` doesn't end in upper case, next char is: insert
# '_' before it
if ret[-1].isalnum():
ret += '_'
ret += ch
upc = ch.isupper()
return c_name(ret.upper()).lstrip('_')
def c_enum_const(type_name: str,
const_name: str,
prefix: Optional[str] = None) -> str:
"""
Generate a C enumeration constant name.
:param type_name: The name of the enumeration.
:param const_name: The name of this constant.
:param prefix: Optional, prefix that overrides the type_name.
"""
qapi: Smarter camel_to_upper() to reduce need for 'prefix' camel_to_upper() converts its argument from camel case to upper case with '_' between words. Used for generated enumeration constant prefixes. When some of the words are spelled all caps, where exactly to insert '_' is guesswork. camel_to_upper()'s guesses are bad enough in places to make people override them with a 'prefix' in the schema. Rewrite it to guess better: 1. Insert '_' after a non-upper case character followed by an upper case character: OneTwo -> ONE_TWO One2Three -> ONE2_THREE 2. Insert '_' before the last upper case character followed by a non-upper case character: ACRONYMWord -> ACRONYM_Word Except at the beginning (as in OneTwo above), or when there is already one: AbCd -> AB_CD This changes the default enumeration constant prefix for a number of enums. Generated enumeration constants change only where the default is not overridden with 'prefix'. The following enumerations without a 'prefix' change: enum old camel_to_upper() new camel_to_upper() ------------------------------------------------------------------ DisplayGLMode DISPLAYGL_MODE DISPLAY_GL_MODE EbpfProgramID EBPF_PROGRAMID EBPF_PROGRAM_ID HmatLBDataType HMATLB_DATA_TYPE HMAT_LB_DATA_TYPE HmatLBMemoryHierarchy HMATLB_MEMORY_HIERARCHY HMAT_LB_MEMORY_HIERARCHY MultiFDCompression MULTIFD_COMPRESSION MULTI_FD_COMPRESSION OffAutoPCIBAR OFF_AUTOPCIBAR OFF_AUTO_PCIBAR QCryptoBlockFormat Q_CRYPTO_BLOCK_FORMAT QCRYPTO_BLOCK_FORMAT QCryptoBlockLUKSKeyslotState Q_CRYPTO_BLOCKLUKS_KEYSLOT_STATE QCRYPTO_BLOCK_LUKS_KEYSLOT_STATE QKeyCode Q_KEY_CODE QKEY_CODE XDbgBlockGraphNodeType X_DBG_BLOCK_GRAPH_NODE_TYPE XDBG_BLOCK_GRAPH_NODE_TYPE TestUnionEnumA TEST_UNION_ENUMA TEST_UNION_ENUM_A Add a 'prefix' so generated code doesn't change now. Subsequent commits will remove most of them again. Two will remain: MULTIFD_COMPRESSION, because migration code generally spells "multifd" that way, and Q_KEY_CODE, because that one is baked into subprojects/keycodemapdb/tools/keymap-gen. The following enumerations with a 'prefix' change so that the prefix is now superfluous: enum old camel_to_upper() new camel_to_upper() [equal to prefix] ------------------------------------------------------------------ BlkdebugIOType BLKDEBUGIO_TYPE BLKDEBUG_IO_TYPE QCryptoTLSCredsEndpoint Q_CRYPTOTLS_CREDS_ENDPOINT QCRYPTO_TLS_CREDS_ENDPOINT QCryptoSecretFormat Q_CRYPTO_SECRET_FORMAT QCRYPTO_SECRET_FORMAT QCryptoCipherMode Q_CRYPTO_CIPHER_MODE QCRYPTO_CIPHER_MODE QCryptodevBackendType Q_CRYPTODEV_BACKEND_TYPE QCRYPTODEV_BACKEND_TYPE QType [builtin] Q_TYPE QTYPE Drop these prefixes. The following enumerations with a 'prefix' change without making the 'prefix' superfluous: enum old camel_to_upper() new camel_to_upper() [equal to prefix] prefix ------------------------------------------------------------------ CpuS390Entitlement CPUS390_ENTITLEMENT CPU_S390_ENTITLEMENT S390_CPU_ENTITLEMENT CpuS390Polarization CPUS390_POLARIZATION CPU_S390_POLARIZATION S390_CPU_POLARIZATION CpuS390State CPUS390_STATE CPU_S390_STATE S390_CPU_STATE QAuthZListFormat Q_AUTHZ_LIST_FORMAT QAUTH_Z_LIST_FORMAT QAUTHZ_LIST_FORMAT QAuthZListPolicy Q_AUTHZ_LIST_POLICY QAUTH_Z_LIST_POLICY QAUTHZ_LIST_POLICY QCryptoAkCipherAlgorithm Q_CRYPTO_AK_CIPHER_ALGORITHM QCRYPTO_AK_CIPHER_ALGORITHM QCRYPTO_AKCIPHER_ALG QCryptoAkCipherKeyType Q_CRYPTO_AK_CIPHER_KEY_TYPE QCRYPTO_AK_CIPHER_KEY_TYPE QCRYPTO_AKCIPHER_KEY_TYPE QCryptoCipherAlgorithm Q_CRYPTO_CIPHER_ALGORITHM QCRYPTO_CIPHER_ALGORITHM QCRYPTO_CIPHER_ALG QCryptoHashAlgorithm Q_CRYPTO_HASH_ALGORITHM QCRYPTO_HASH_ALGORITHM QCRYPTO_HASH_ALG QCryptoIVGenAlgorithm Q_CRYPTOIV_GEN_ALGORITHM QCRYPTO_IV_GEN_ALGORITHM QCRYPTO_IVGEN_ALG QCryptoRSAPaddingAlgorithm Q_CRYPTORSA_PADDING_ALGORITHM QCRYPTO_RSA_PADDING_ALGORITHM QCRYPTO_RSA_PADDING_ALG QCryptodevBackendAlgType Q_CRYPTODEV_BACKEND_ALG_TYPE QCRYPTODEV_BACKEND_ALG_TYPE QCRYPTODEV_BACKEND_ALG QCryptodevBackendServiceType Q_CRYPTODEV_BACKEND_SERVICE_TYPE QCRYPTODEV_BACKEND_SERVICE_TYPE QCRYPTODEV_BACKEND_SERVICE Subsequent commits will tweak things to remove most of these prefixes. Only QAUTHZ_LIST_FORMAT and QAUTHZ_LIST_POLICY will remain. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> Message-ID: <20240904111836.3273842-2-armbru@redhat.com>
2024-09-04 13:18:18 +02:00
if prefix is None:
prefix = camel_to_upper(type_name)
return prefix + '_' + c_name(const_name, False).upper()
def c_name(name: str, protect: bool = True) -> str:
"""
Map ``name`` to a valid C identifier.
Used for converting 'name' from a 'name':'type' qapi definition
into a generated struct member, as well as converting type names
into substrings of a generated C function name.
'__a.b_c' -> '__a_b_c', 'x-foo' -> 'x_foo'
protect=True: 'int' -> 'q_int'; protect=False: 'int' -> 'int'
:param name: The name to map.
:param protect: If true, avoid returning certain ticklish identifiers
(like C keywords) by prepending ``q_``.
"""
# ANSI X3J11/88-090, 3.1.1
c89_words = set(['auto', 'break', 'case', 'char', 'const', 'continue',
'default', 'do', 'double', 'else', 'enum', 'extern',
'float', 'for', 'goto', 'if', 'int', 'long', 'register',
'return', 'short', 'signed', 'sizeof', 'static',
'struct', 'switch', 'typedef', 'union', 'unsigned',
'void', 'volatile', 'while'])
# ISO/IEC 9899:1999, 6.4.1
c99_words = set(['inline', 'restrict', '_Bool', '_Complex', '_Imaginary'])
# ISO/IEC 9899:2011, 6.4.1
c11_words = set(['_Alignas', '_Alignof', '_Atomic', '_Generic',
'_Noreturn', '_Static_assert', '_Thread_local'])
# GCC http://gcc.gnu.org/onlinedocs/gcc-4.7.1/gcc/C-Extensions.html
# excluding _.*
gcc_words = set(['asm', 'typeof'])
# C++ ISO/IEC 14882:2003 2.11
cpp_words = set(['bool', 'catch', 'class', 'const_cast', 'delete',
'dynamic_cast', 'explicit', 'false', 'friend', 'mutable',
'namespace', 'new', 'operator', 'private', 'protected',
'public', 'reinterpret_cast', 'static_cast', 'template',
'this', 'throw', 'true', 'try', 'typeid', 'typename',
'using', 'virtual', 'wchar_t',
# alternative representations
'and', 'and_eq', 'bitand', 'bitor', 'compl', 'not',
'not_eq', 'or', 'or_eq', 'xor', 'xor_eq'])
# namespace pollution:
polluted_words = set(['unix', 'errno', 'mips', 'sparc', 'i386', 'linux'])
name = re.sub(r'[^A-Za-z0-9_]', '_', name)
if protect and (name in (c89_words | c99_words | c11_words | gcc_words
| cpp_words | polluted_words)
or name[0].isdigit()):
return 'q_' + name
return name
class Indentation:
"""
Indentation level management.
:param initial: Initial number of spaces, default 0.
"""
def __init__(self, initial: int = 0) -> None:
self._level = initial
def __repr__(self) -> str:
return "{}({:d})".format(type(self).__name__, self._level)
def __str__(self) -> str:
"""Return the current indentation as a string of spaces."""
return ' ' * self._level
def increase(self, amount: int = 4) -> None:
"""Increase the indentation level by ``amount``, default 4."""
self._level += amount
def decrease(self, amount: int = 4) -> None:
"""Decrease the indentation level by ``amount``, default 4."""
assert amount <= self._level
self._level -= amount
#: Global, current indent level for code generation.
indent = Indentation()
def cgen(code: str, **kwds: object) -> str:
"""
Generate ``code`` with ``kwds`` interpolated.
Obey `indent`, and strip `EATSPACE`.
"""
raw = code % kwds
pfx = str(indent)
if pfx:
raw = re.sub(r'^(?!(#|$))', pfx, raw, flags=re.MULTILINE)
return re.sub(re.escape(EATSPACE) + r' *', '', raw)
def mcgen(code: str, **kwds: object) -> str:
if code[0] == '\n':
code = code[1:]
return cgen(code, **kwds)
def c_fname(filename: str) -> str:
return re.sub(r'[^A-Za-z0-9_]', '_', filename)
def guardstart(name: str) -> str:
return mcgen('''
#ifndef %(name)s
#define %(name)s
''',
name=c_fname(name).upper())
def guardend(name: str) -> str:
return mcgen('''
#endif /* %(name)s */
''',
name=c_fname(name).upper())
def gen_ifcond(ifcond: Optional[Union[str, Dict[str, Any]]],
cond_fmt: str, not_fmt: str,
all_operator: str, any_operator: str) -> str:
def do_gen(ifcond: Union[str, Dict[str, Any]],
need_parens: bool) -> str:
if isinstance(ifcond, str):
return cond_fmt % ifcond
assert isinstance(ifcond, dict) and len(ifcond) == 1
if 'not' in ifcond:
return not_fmt % do_gen(ifcond['not'], True)
if 'all' in ifcond:
gen = gen_infix(all_operator, ifcond['all'])
else:
gen = gen_infix(any_operator, ifcond['any'])
if need_parens:
gen = '(' + gen + ')'
return gen
def gen_infix(operator: str, operands: Sequence[Any]) -> str:
return operator.join([do_gen(o, True) for o in operands])
if not ifcond:
return ''
return do_gen(ifcond, False)
def cgen_ifcond(ifcond: Optional[Union[str, Dict[str, Any]]]) -> str:
return gen_ifcond(ifcond, 'defined(%s)', '!%s', ' && ', ' || ')
def docgen_ifcond(ifcond: Optional[Union[str, Dict[str, Any]]]) -> str:
# TODO Doc generated for conditions needs polish
return gen_ifcond(ifcond, '%s', 'not %s', ' and ', ' or ')
def gen_if(cond: str) -> str:
if not cond:
return ''
return mcgen('''
#if %(cond)s
''', cond=cond)
def gen_endif(cond: str) -> str:
if not cond:
return ''
return mcgen('''
#endif /* %(cond)s */
''', cond=cond)
def must_match(pattern: str, string: str) -> Match[str]:
match = re.match(pattern, string)
assert match is not None
return match