trivial modification on the utility module

1. add get macro related inteface, for now we have't fully implemented the way to expand and evalute macros at runtime. We just deal with some macros that can be expand and evaluate into essential constants that will be needed later. 2. rearrange utility functions in a different order 3. reimplment the get register API to make it more commonly used Signed-off-by: Gao Jiawei <gaojiawei@xiaomi.com>
2024-07-09 11:26:27 +08:00 · 2024-07-09 11:26:27 +08:00 · 8edb9283ba
commit 8edb9283ba
parent 76db3c8939
3 changed files with 386 additions and 77 deletions
--- a/tools/gdb/macros.py
+++ b/tools/gdb/macros.py
@ -0,0 +1,176 @@
+############################################################################
+# tools/gdb/macros.py
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.  The
+# ASF licenses this file to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance with the
+# License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations
+# under the License.
+#
+############################################################################
+
+# NOTE: GDB stores macro information based on the current stack frame's scope,
+# including the source file and line number. Therefore, there may be missing
+# macro definitions when you are at different stack frames.
+#
+# To resolve this issue, we need to retrieve all macro information from the ELF file
+# then parse and evaluate it by ourselves.
+#
+# There might be two ways to achieve this, one is to leverage the C preprocessor
+# to directly preprocess all the macros instereted into python constants
+# gcc -E -x c -P <file_with_macros> -I/path/to/nuttx/include
+#
+# While the other way is to leverage the dwarf info stored in the ELF file,
+# with -g3 switch, we have a `.debug_macro` section containing all the information
+# about the macros.
+#
+# Currently, we using the second method.
+
+import os
+import re
+import subprocess
+import tempfile
+
+PUNCTUATORS = [
+    "\[", "\]", "\(", "\)", "\{", "\}", "\?", ";", ",", "~",
+    "\.\.\.", "\.",
+    "\-\>", "\-\-", "\-\=", "\-",
+    "\+\+", "\+\=", "\+",
+    "\*\=", "\*",
+    "\!\=", "\!",
+    "\&\&", "\&\=", "\&",
+    "\/\=", "\/",
+    "\%\>", "%:%:", "%:", "%=", "%",
+    "\^\=", "\^",
+    "\#\#", "\#",
+    "\:\>", "\:",
+    "\|\|", "\|\=", "\|",
+    "<<=", "<<", "<=", "<:", "<%", "<",
+    ">>=", ">>", ">=", ">",
+    "\=\=", "\=",
+]
+
+
+def parse_macro(line, macros, pattern):
+    # grep name, value
+    # the first group matches the token, the second matches the replacement
+    m = pattern.match(line)
+    if not m:
+        return False
+
+    name, value = m.group(1), m.group(2)
+
+    if name in macros:
+        # FIXME: what should we do if we got a redefinition/duplication here?
+        # for now I think it's ok just overwrite the old value
+        pass
+
+    # emplace, for all undefined macros we evalute it to zero
+    macros[name] = value if value else "0"
+
+    return True
+
+
+def fetch_macro_info(file):
+    if not os.path.isfile(file):
+        raise FileNotFoundError("No given ELF target found")
+
+    # FIXME: we don't use subprocess here because
+    # it's broken on some GDB distribution :(, I haven't
+    # found a solution to it.
+
+    with tempfile.NamedTemporaryFile(delete=False) as f1:
+
+        # # os.system(f"readelf -wm {file} > {output}")
+        process = subprocess.Popen(
+            f"readelf -wm {file}",
+            shell=True,
+            stdout=f1,
+            stderr=subprocess.STDOUT)
+
+        process.communicate()
+        errcode = process.returncode
+
+        f1.close()
+
+        if errcode != 0:
+            return {}
+
+        p = re.compile(".*macro[ ]*:[ ]*([\S]+\(.*?\)|[\w]+)[ ]*(.*)")
+        macros = {}
+
+        with open(f1.name, 'rb') as f2:
+            for line in f2.readlines():
+                line = line.decode("utf-8")
+                if not line.startswith(" DW_MACRO_define") and \
+                   not line.startswith(" DW_MACRO_undef"):
+                    continue
+
+                if not parse_macro(line, macros, p):
+                    print(f"Failed to parse {line}")
+
+    return macros
+
+
+def split_tokens(expr):
+    p = "(" + "|".join(PUNCTUATORS) + ")"
+    res = list(filter(lambda e : e != "",
+          map(lambda e: e.rstrip().lstrip(), re.split(p, expr))))
+    return res
+
+
+def do_expand(expr, macro_map):
+    if expr in PUNCTUATORS:
+        return expr
+
+    tokens = split_tokens(expr)
+
+    res = []
+
+    for t in tokens:
+        if t not in macro_map:
+            res.append(t)
+            continue
+        res += do_expand(macro_map[t], macro_map)
+
+    return res
+
+
+# NOTE: Implement a fully functional parser which can
+# preprocessing all the C marcos according to ISO 9899 standard
+# may be an overkill, what we really care about are those
+# macros that can be evaluted to an constant value.
+#
+# #define A (B + C + D)
+# #define B 1
+# #define C 2
+# #define D 3
+# invoking try_expand('A', macro_map) will give you "(1 + 2 + 3)"
+#
+# However,
+# #define SUM(B,C,D) (B + C + D)
+# invoking try_expand('SUM(1,2,3)', macro_map) will give you "SUM(1,2,3)"
+#
+# We have not implemented this feature as we have not found a practical
+# use case for it in our GDB plugin.
+#
+# However, you can switch to the correct stack frame that has this macro defined
+# and let GDB expand and evaluate it for you if you really want to evalue some very
+# complex macros.
+
+
+def try_expand(expr, macro):
+    res = []
+
+    res += do_expand(expr, macro)
+
+    return "".join(res)
--- a/tools/gdb/thread.py
+++ b/tools/gdb/thread.py
@ -118,24 +118,6 @@ class Nxsetregs(gdb.Command):
            i += 1


-def get_pc_value(tcb):
-    arch = gdb.selected_frame().architecture()
-    tcbinfo = gdb.parse_and_eval("g_tcbinfo")
-
-    i = 0
-    for reg in arch.registers():
-        if reg.name == "pc" or reg.name == "rip" or reg.name == "eip":
-            break
-        i += 1
-
-    regs = tcb["xcp"]["regs"].cast(gdb.lookup_type("char").pointer())
-    value = gdb.Value(regs + tcbinfo["reg_off"]["p"][i]).cast(
-        gdb.lookup_type("uintptr_t").pointer()
-    )[0]
-
-    return int(value)
-
-
 class Nxinfothreads(gdb.Command):
    def __init__(self):
        super(Nxinfothreads, self).__init__("info threads", gdb.COMMAND_USER)
@ -162,10 +144,10 @@ class Nxinfothreads(gdb.Command):

            if pidhash[i]["task_state"] == gdb.parse_and_eval("TSTATE_TASK_RUNNING"):
                index = "*%s" % i
-                pc = int(gdb.parse_and_eval("$pc"))
+                pc = utils.get_register_byname(utils.get_arch_pc_name(), tcb=None)
            else:
                index = " %s" % i
-                pc = get_pc_value(pidhash[i])
+                pc = utils.get_register_byname(utils.get_arch_pc_name(), tcb=pidhash[i])

            thread = "Thread 0x%x" % pidhash[i]

--- a/tools/gdb/utils.py
+++ b/tools/gdb/utils.py
@ -20,8 +20,10 @@
 #
 ############################################################################

-import gdb
 import re
+import gdb
+from macros import fetch_macro_info, try_expand
+

 class CachedType:
    """Cache a type object, so that we can reconnect to the new_objfile event"""
@ -38,7 +40,8 @@ class CachedType:
        if self._type is None:
            self._type = gdb.lookup_type(self._name)
            if self._type is None:
-                raise gdb.GdbError("cannot resolve type '{0}'".format(self._name))
+                raise gdb.GdbError(
+                    "cannot resolve type '{0}'".format(self._name))
            if hasattr(gdb, "events") and hasattr(gdb.events, "new_objfile"):
                gdb.events.new_objfile.connect(self._new_objfile_handler)
        return self._type
@ -47,6 +50,44 @@ class CachedType:
 long_type = CachedType("long")


+class MacroCtx:
+    """
+    This is a singleton class wich only initializes once to
+    cache a context of macro definition which can be queried later
+    TODO: we only deal with single ELF at the moment for simplicity
+    If you load more object files while debugging, only the first one gets loaded
+    will be used to retrieve macro information
+    """
+
+    def __new__(cls, *args, **kwargs):
+        if not hasattr(cls, 'instance'):
+            cls.instance = super(MacroCtx, cls).__new__(cls)
+        return cls.instance
+
+    def __init__(self, filename):
+        self._macro_map = {}
+        self._file = filename
+
+        self._macro_map = fetch_macro_info(filename)
+
+    @property
+    def macro_map(self):
+        return self._macro_map
+
+    @property
+    def objfile(self):
+        return self._file
+
+
+if len(gdb.objfiles()) > 0:
+    macroctx = MacroCtx(gdb.objfiles()[0].filename)
+else:
+    raise gdb.GdbError("An executable file must be provided")
+
+
+# Common Helper Functions
+
+
 def get_long_type():
    """Return the cached long type object"""
    global long_type
@ -76,13 +117,90 @@ class ContainerOf(gdb.Function):

    def invoke(self, ptr, typename, elementname):
        return container_of(
-            ptr, gdb.lookup_type(typename.string()).pointer(), elementname.string()
+            ptr, gdb.lookup_type(
+                typename.string()).pointer(), elementname.string()
        )


 ContainerOf()


+def gdb_eval_or_none(expresssion):
+    """Evaluate an expression and return None if it fails"""
+    try:
+        return gdb.parse_and_eval(expresssion)
+    except gdb.error:
+        return None
+
+
+def get_symbol_value(name):
+    """Return the value of a symbol value etc: Variable, Marco"""
+
+    expr = None
+
+    try:
+        gdb.execute("set $_%s = %s" % (name, name))
+        expr = "$_%s" % (name)
+    except gdb.error:
+        expr = try_expand(name, macroctx.macro_map)
+    return gdb_eval_or_none(expr)
+
+
+def hexdump(address, size):
+    inf = gdb.inferiors()[0]
+    mem = inf.read_memory(address, size)
+    bytes = mem.tobytes()
+    for i in range(0, len(bytes), 16):
+        chunk = bytes[i:i + 16]
+        gdb.write(f"{i + address:08x}  ")
+        hex_values = " ".join(f"{byte:02x}" for byte in chunk)
+        hex_display = f"{hex_values:<47}"
+        gdb.write(hex_display)
+        ascii_values = "".join(chr(byte) if 32 <= byte
+                               <= 126 else "." for byte in chunk)
+        gdb.write(f"  {ascii_values} \n")
+
+
+def is_decimal(s):
+    return re.fullmatch(r"\d+", s) is not None
+
+
+def is_hexadecimal(s):
+    return re.fullmatch(r"0[xX][0-9a-fA-F]+|[0-9a-fA-F]+", s) is not None
+
+
+class Hexdump(gdb.Command):
+    """hexdump address/symbol <size>"""
+
+    def __init__(self):
+        super(Hexdump, self).__init__("hexdump", gdb.COMMAND_USER)
+
+    def invoke(self, args, from_tty):
+        argv = args.split(" ")
+        address = 0
+        size = 0
+        if (argv[0] == ""):
+            gdb.write("Usage: hexdump address/symbol <size>\n")
+            return
+
+        if is_decimal(argv[0]) or is_hexadecimal(argv[0]):
+            address = int(argv[0], 0)
+            size = int(argv[1], 0)
+        else:
+            var = gdb.parse_and_eval(f'{argv[0]}')
+            address = int(var.address)
+            size = int(var.type.sizeof)
+            gdb.write(f"{argv[0]} {hex(address)} {int(size)}\n")
+
+        hexdump(address, size)
+
+
+Hexdump()
+
+
+# Machine Specific Helper Functions
+
+
 BIG_ENDIAN = 0
 LITTLE_ENDIAN = 1
 target_endianness = None
@ -112,7 +230,7 @@ def read_memoryview(inf, start, length):

 def read_u16(buffer, offset):
    """Read a 16-bit unsigned integer from a buffer"""
-    buffer_val = buffer[offset : offset + 2]
+    buffer_val = buffer[offset: offset + 2]
    value = [0, 0]

    if type(buffer_val[0]) is str:
@ -155,23 +273,39 @@ def read_ulong(buffer, offset):
 target_arch = None


-def is_target_arch(arch):
-    """Return True if the target architecture is ARCH"""
+def is_target_arch(arch, exact=False):
+    """
+    For non exactly match, this function will
+    return True if the target architecture contains
+    keywords of an ARCH family. For example, x86 is
+    contained in i386:x86_64.
+    For exact match, this function will return True if
+    the target architecture is exactly the same as ARCH.
+    """
    if hasattr(gdb.Frame, "architecture"):
-        return arch in gdb.newest_frame().architecture().name()
+        archname = gdb.newest_frame().architecture().name()
+
+        return arch in archname \
+            if not exact else arch == archname
    else:
        global target_arch
        if target_arch is None:
            target_arch = gdb.execute("show architecture", to_string=True)
-        return arch in target_arch
+            pattern = r'set to "(.*?)"\s*(\(currently (".*")\))?'
+            match = re.search(pattern, target_arch)
+
+            candidate = match.group(1)
+
+            if candidate == "auto":
+                target_arch = match.group(3)
+            else:
+                target_arch = candidate
+
+        return arch in target_arch \
+            if not exact else arch == target_arch


-def gdb_eval_or_none(expresssion):
-    """Evaluate an expression and return None if it fails"""
-    try:
-        return gdb.parse_and_eval(expresssion)
-    except gdb.error:
-        return None
+# Kernel Specific Helper Functions


 def is_target_smp():
@ -183,55 +317,72 @@ def is_target_smp():
        return False


-def get_symbol_value(name):
-    """Return the value of a symbol value etc: Variable, Marco"""
+# FIXME: support RISC-V/X86/ARM64 etc.
+def in_interrupt_context(cpuid=0):
+    frame = gdb.selected_frame()

-    gdb.execute("set $_%s = %s" % (name, name))
-    return gdb.parse_and_eval("$_%s" % name)
-
-def hexdump(address, size):
-    inf = gdb.inferiors()[0]
-    mem = inf.read_memory(address, size)
-    bytes = mem.tobytes()
-    for i in range(0, len(bytes), 16):
-        chunk = bytes[i:i+16]
-        gdb.write(f"{i + address:08x}  ")
-        hex_values = " ".join(f"{byte:02x}" for byte in chunk)
-        hex_display = f"{hex_values:<47}"
-        gdb.write(hex_display)
-        ascii_values = "".join(chr(byte) if 32 <= byte <= 126 else "." for byte in chunk)
-        gdb.write(f"  {ascii_values} \n")
+    if is_target_arch("arm"):
+        xpsr = int(frame.read_register('xpsr'))
+        return xpsr & 0xf
+    else:
+        # TODO: figure out a more proper way to detect if
+        # we are in an interrupt context
+        g_current_regs = gdb_eval_or_none("g_current_regs")
+        return not g_current_regs[cpuid]


-def is_decimal(s):
-    return re.fullmatch(r"\d+", s) is not None
+def get_arch_sp_name():
+    if is_target_arch("arm", exact=True):
+        return "sp"
+    elif is_target_arch("i386", exact=True):
+        return "esp"
+    elif is_target_arch("i386:x86-64", exact=True):
+        return "rsp"
+    else:
+        raise gdb.GdbError("Not implemented yet")

-def is_hexadecimal(s):
-    return re.fullmatch(r"0[xX][0-9a-fA-F]+|[0-9a-fA-F]+", s) is not None

-class Hexdump(gdb.Command):
-    """hexdump address/symbol <size>"""
+def get_arch_pc_name():
+    if is_target_arch("arm", exact=True):
+        return "pc"
+    elif is_target_arch("i386", exact=True):
+        return "eip"
+    elif is_target_arch("i386:x86-64", exact=True):
+        return "rip"
+    else:
+        raise gdb.GdbError("Not implemented yet")

-    def __init__(self):
-        super(Hexdump, self).__init__("hexdump", gdb.COMMAND_USER)
-    def invoke(self, args, from_tty):
-        argv = args.split(" ")
-        argc = len(argv)
-        address = 0
-        size = 0
-        if (argv[0] == ""):
-            gdb.write("Usage: hexdump address/symbol <size>\n")
-            return

-        if is_decimal(argv[0]) or is_hexadecimal(argv[0]):
-            address = int(argv[0], 0)
-            size = int(argv[1], 0)
-        else:
-            var = gdb.parse_and_eval(f'{argv[0]}')
-            address = int(var.address)
-            size = int(var.type.sizeof)
-            gdb.write(f"{argv[0]} {hex(address)} {int(size)}\n")
+def get_register_byname(regname, tcb=None):
+    frame = gdb.selected_frame()

-        hexdump(address, size)
+    # If no tcb is given then we can directly used the register from
+    # the cached frame by GDB
+    if not tcb:
+        return int(frame.read_register(regname))

-Hexdump()
+    # Ok, let's take it from the context in the given tcb
+    arch = frame.architecture()
+    tcbinfo = gdb.parse_and_eval("g_tcbinfo")
+
+    i = 0
+    for reg in arch.registers():
+        if reg.name == regname:
+            break
+        i += 1
+
+    regs = tcb["xcp"]["regs"].cast(gdb.lookup_type("char").pointer())
+    value = gdb.Value(regs + tcbinfo["reg_off"]["p"][i]).cast(
+        gdb.lookup_type("uintptr_t").pointer()
+    )[0]
+
+    return int(value)
+
+
+def get_tcbs():
+    # In case we have created/deleted tasks at runtime, the tcbs will change
+    # so keep it as fresh as possible
+    pidhash = gdb.parse_and_eval("g_pidhash")
+    npidhash = gdb.parse_and_eval("g_npidhash")
+
+    return [pidhash[i] for i in range(0, npidhash) if pidhash[i]]