1
0
Fork 0
forked from nuttx/nuttx-update

trivial modification on the utility module

1. add get macro related inteface, for now we have't fully implemented
the way to expand and evalute macros at runtime. We just deal with some macros that can be expand and evaluate into essential constants that will be needed later.
2. rearrange utility functions in a different order
3. reimplment the get register API to make it more commonly used

Signed-off-by: Gao Jiawei <gaojiawei@xiaomi.com>
This commit is contained in:
Gao Jiawei 2024-07-09 11:26:27 +08:00 committed by Xiang Xiao
parent 76db3c8939
commit 8edb9283ba
3 changed files with 386 additions and 77 deletions

176
tools/gdb/macros.py Normal file
View file

@ -0,0 +1,176 @@
############################################################################
# tools/gdb/macros.py
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership. The
# ASF licenses this file to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance with the
# License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
############################################################################
# NOTE: GDB stores macro information based on the current stack frame's scope,
# including the source file and line number. Therefore, there may be missing
# macro definitions when you are at different stack frames.
#
# To resolve this issue, we need to retrieve all macro information from the ELF file
# then parse and evaluate it by ourselves.
#
# There might be two ways to achieve this, one is to leverage the C preprocessor
# to directly preprocess all the macros instereted into python constants
# gcc -E -x c -P <file_with_macros> -I/path/to/nuttx/include
#
# While the other way is to leverage the dwarf info stored in the ELF file,
# with -g3 switch, we have a `.debug_macro` section containing all the information
# about the macros.
#
# Currently, we using the second method.
import os
import re
import subprocess
import tempfile
PUNCTUATORS = [
"\[", "\]", "\(", "\)", "\{", "\}", "\?", ";", ",", "~",
"\.\.\.", "\.",
"\-\>", "\-\-", "\-\=", "\-",
"\+\+", "\+\=", "\+",
"\*\=", "\*",
"\!\=", "\!",
"\&\&", "\&\=", "\&",
"\/\=", "\/",
"\%\>", "%:%:", "%:", "%=", "%",
"\^\=", "\^",
"\#\#", "\#",
"\:\>", "\:",
"\|\|", "\|\=", "\|",
"<<=", "<<", "<=", "<:", "<%", "<",
">>=", ">>", ">=", ">",
"\=\=", "\=",
]
def parse_macro(line, macros, pattern):
# grep name, value
# the first group matches the token, the second matches the replacement
m = pattern.match(line)
if not m:
return False
name, value = m.group(1), m.group(2)
if name in macros:
# FIXME: what should we do if we got a redefinition/duplication here?
# for now I think it's ok just overwrite the old value
pass
# emplace, for all undefined macros we evalute it to zero
macros[name] = value if value else "0"
return True
def fetch_macro_info(file):
if not os.path.isfile(file):
raise FileNotFoundError("No given ELF target found")
# FIXME: we don't use subprocess here because
# it's broken on some GDB distribution :(, I haven't
# found a solution to it.
with tempfile.NamedTemporaryFile(delete=False) as f1:
# # os.system(f"readelf -wm {file} > {output}")
process = subprocess.Popen(
f"readelf -wm {file}",
shell=True,
stdout=f1,
stderr=subprocess.STDOUT)
process.communicate()
errcode = process.returncode
f1.close()
if errcode != 0:
return {}
p = re.compile(".*macro[ ]*:[ ]*([\S]+\(.*?\)|[\w]+)[ ]*(.*)")
macros = {}
with open(f1.name, 'rb') as f2:
for line in f2.readlines():
line = line.decode("utf-8")
if not line.startswith(" DW_MACRO_define") and \
not line.startswith(" DW_MACRO_undef"):
continue
if not parse_macro(line, macros, p):
print(f"Failed to parse {line}")
return macros
def split_tokens(expr):
p = "(" + "|".join(PUNCTUATORS) + ")"
res = list(filter(lambda e : e != "",
map(lambda e: e.rstrip().lstrip(), re.split(p, expr))))
return res
def do_expand(expr, macro_map):
if expr in PUNCTUATORS:
return expr
tokens = split_tokens(expr)
res = []
for t in tokens:
if t not in macro_map:
res.append(t)
continue
res += do_expand(macro_map[t], macro_map)
return res
# NOTE: Implement a fully functional parser which can
# preprocessing all the C marcos according to ISO 9899 standard
# may be an overkill, what we really care about are those
# macros that can be evaluted to an constant value.
#
# #define A (B + C + D)
# #define B 1
# #define C 2
# #define D 3
# invoking try_expand('A', macro_map) will give you "(1 + 2 + 3)"
#
# However,
# #define SUM(B,C,D) (B + C + D)
# invoking try_expand('SUM(1,2,3)', macro_map) will give you "SUM(1,2,3)"
#
# We have not implemented this feature as we have not found a practical
# use case for it in our GDB plugin.
#
# However, you can switch to the correct stack frame that has this macro defined
# and let GDB expand and evaluate it for you if you really want to evalue some very
# complex macros.
def try_expand(expr, macro):
res = []
res += do_expand(expr, macro)
return "".join(res)

View file

@ -118,24 +118,6 @@ class Nxsetregs(gdb.Command):
i += 1
def get_pc_value(tcb):
arch = gdb.selected_frame().architecture()
tcbinfo = gdb.parse_and_eval("g_tcbinfo")
i = 0
for reg in arch.registers():
if reg.name == "pc" or reg.name == "rip" or reg.name == "eip":
break
i += 1
regs = tcb["xcp"]["regs"].cast(gdb.lookup_type("char").pointer())
value = gdb.Value(regs + tcbinfo["reg_off"]["p"][i]).cast(
gdb.lookup_type("uintptr_t").pointer()
)[0]
return int(value)
class Nxinfothreads(gdb.Command):
def __init__(self):
super(Nxinfothreads, self).__init__("info threads", gdb.COMMAND_USER)
@ -162,10 +144,10 @@ class Nxinfothreads(gdb.Command):
if pidhash[i]["task_state"] == gdb.parse_and_eval("TSTATE_TASK_RUNNING"):
index = "*%s" % i
pc = int(gdb.parse_and_eval("$pc"))
pc = utils.get_register_byname(utils.get_arch_pc_name(), tcb=None)
else:
index = " %s" % i
pc = get_pc_value(pidhash[i])
pc = utils.get_register_byname(utils.get_arch_pc_name(), tcb=pidhash[i])
thread = "Thread 0x%x" % pidhash[i]

View file

@ -20,8 +20,10 @@
#
############################################################################
import gdb
import re
import gdb
from macros import fetch_macro_info, try_expand
class CachedType:
"""Cache a type object, so that we can reconnect to the new_objfile event"""
@ -38,7 +40,8 @@ class CachedType:
if self._type is None:
self._type = gdb.lookup_type(self._name)
if self._type is None:
raise gdb.GdbError("cannot resolve type '{0}'".format(self._name))
raise gdb.GdbError(
"cannot resolve type '{0}'".format(self._name))
if hasattr(gdb, "events") and hasattr(gdb.events, "new_objfile"):
gdb.events.new_objfile.connect(self._new_objfile_handler)
return self._type
@ -47,6 +50,44 @@ class CachedType:
long_type = CachedType("long")
class MacroCtx:
"""
This is a singleton class wich only initializes once to
cache a context of macro definition which can be queried later
TODO: we only deal with single ELF at the moment for simplicity
If you load more object files while debugging, only the first one gets loaded
will be used to retrieve macro information
"""
def __new__(cls, *args, **kwargs):
if not hasattr(cls, 'instance'):
cls.instance = super(MacroCtx, cls).__new__(cls)
return cls.instance
def __init__(self, filename):
self._macro_map = {}
self._file = filename
self._macro_map = fetch_macro_info(filename)
@property
def macro_map(self):
return self._macro_map
@property
def objfile(self):
return self._file
if len(gdb.objfiles()) > 0:
macroctx = MacroCtx(gdb.objfiles()[0].filename)
else:
raise gdb.GdbError("An executable file must be provided")
# Common Helper Functions
def get_long_type():
"""Return the cached long type object"""
global long_type
@ -76,13 +117,90 @@ class ContainerOf(gdb.Function):
def invoke(self, ptr, typename, elementname):
return container_of(
ptr, gdb.lookup_type(typename.string()).pointer(), elementname.string()
ptr, gdb.lookup_type(
typename.string()).pointer(), elementname.string()
)
ContainerOf()
def gdb_eval_or_none(expresssion):
"""Evaluate an expression and return None if it fails"""
try:
return gdb.parse_and_eval(expresssion)
except gdb.error:
return None
def get_symbol_value(name):
"""Return the value of a symbol value etc: Variable, Marco"""
expr = None
try:
gdb.execute("set $_%s = %s" % (name, name))
expr = "$_%s" % (name)
except gdb.error:
expr = try_expand(name, macroctx.macro_map)
return gdb_eval_or_none(expr)
def hexdump(address, size):
inf = gdb.inferiors()[0]
mem = inf.read_memory(address, size)
bytes = mem.tobytes()
for i in range(0, len(bytes), 16):
chunk = bytes[i:i + 16]
gdb.write(f"{i + address:08x} ")
hex_values = " ".join(f"{byte:02x}" for byte in chunk)
hex_display = f"{hex_values:<47}"
gdb.write(hex_display)
ascii_values = "".join(chr(byte) if 32 <= byte
<= 126 else "." for byte in chunk)
gdb.write(f" {ascii_values} \n")
def is_decimal(s):
return re.fullmatch(r"\d+", s) is not None
def is_hexadecimal(s):
return re.fullmatch(r"0[xX][0-9a-fA-F]+|[0-9a-fA-F]+", s) is not None
class Hexdump(gdb.Command):
"""hexdump address/symbol <size>"""
def __init__(self):
super(Hexdump, self).__init__("hexdump", gdb.COMMAND_USER)
def invoke(self, args, from_tty):
argv = args.split(" ")
address = 0
size = 0
if (argv[0] == ""):
gdb.write("Usage: hexdump address/symbol <size>\n")
return
if is_decimal(argv[0]) or is_hexadecimal(argv[0]):
address = int(argv[0], 0)
size = int(argv[1], 0)
else:
var = gdb.parse_and_eval(f'{argv[0]}')
address = int(var.address)
size = int(var.type.sizeof)
gdb.write(f"{argv[0]} {hex(address)} {int(size)}\n")
hexdump(address, size)
Hexdump()
# Machine Specific Helper Functions
BIG_ENDIAN = 0
LITTLE_ENDIAN = 1
target_endianness = None
@ -112,7 +230,7 @@ def read_memoryview(inf, start, length):
def read_u16(buffer, offset):
"""Read a 16-bit unsigned integer from a buffer"""
buffer_val = buffer[offset : offset + 2]
buffer_val = buffer[offset: offset + 2]
value = [0, 0]
if type(buffer_val[0]) is str:
@ -155,23 +273,39 @@ def read_ulong(buffer, offset):
target_arch = None
def is_target_arch(arch):
"""Return True if the target architecture is ARCH"""
def is_target_arch(arch, exact=False):
"""
For non exactly match, this function will
return True if the target architecture contains
keywords of an ARCH family. For example, x86 is
contained in i386:x86_64.
For exact match, this function will return True if
the target architecture is exactly the same as ARCH.
"""
if hasattr(gdb.Frame, "architecture"):
return arch in gdb.newest_frame().architecture().name()
archname = gdb.newest_frame().architecture().name()
return arch in archname \
if not exact else arch == archname
else:
global target_arch
if target_arch is None:
target_arch = gdb.execute("show architecture", to_string=True)
return arch in target_arch
pattern = r'set to "(.*?)"\s*(\(currently (".*")\))?'
match = re.search(pattern, target_arch)
candidate = match.group(1)
if candidate == "auto":
target_arch = match.group(3)
else:
target_arch = candidate
return arch in target_arch \
if not exact else arch == target_arch
def gdb_eval_or_none(expresssion):
"""Evaluate an expression and return None if it fails"""
try:
return gdb.parse_and_eval(expresssion)
except gdb.error:
return None
# Kernel Specific Helper Functions
def is_target_smp():
@ -183,55 +317,72 @@ def is_target_smp():
return False
def get_symbol_value(name):
"""Return the value of a symbol value etc: Variable, Marco"""
# FIXME: support RISC-V/X86/ARM64 etc.
def in_interrupt_context(cpuid=0):
frame = gdb.selected_frame()
gdb.execute("set $_%s = %s" % (name, name))
return gdb.parse_and_eval("$_%s" % name)
def hexdump(address, size):
inf = gdb.inferiors()[0]
mem = inf.read_memory(address, size)
bytes = mem.tobytes()
for i in range(0, len(bytes), 16):
chunk = bytes[i:i+16]
gdb.write(f"{i + address:08x} ")
hex_values = " ".join(f"{byte:02x}" for byte in chunk)
hex_display = f"{hex_values:<47}"
gdb.write(hex_display)
ascii_values = "".join(chr(byte) if 32 <= byte <= 126 else "." for byte in chunk)
gdb.write(f" {ascii_values} \n")
if is_target_arch("arm"):
xpsr = int(frame.read_register('xpsr'))
return xpsr & 0xf
else:
# TODO: figure out a more proper way to detect if
# we are in an interrupt context
g_current_regs = gdb_eval_or_none("g_current_regs")
return not g_current_regs[cpuid]
def is_decimal(s):
return re.fullmatch(r"\d+", s) is not None
def get_arch_sp_name():
if is_target_arch("arm", exact=True):
return "sp"
elif is_target_arch("i386", exact=True):
return "esp"
elif is_target_arch("i386:x86-64", exact=True):
return "rsp"
else:
raise gdb.GdbError("Not implemented yet")
def is_hexadecimal(s):
return re.fullmatch(r"0[xX][0-9a-fA-F]+|[0-9a-fA-F]+", s) is not None
class Hexdump(gdb.Command):
"""hexdump address/symbol <size>"""
def get_arch_pc_name():
if is_target_arch("arm", exact=True):
return "pc"
elif is_target_arch("i386", exact=True):
return "eip"
elif is_target_arch("i386:x86-64", exact=True):
return "rip"
else:
raise gdb.GdbError("Not implemented yet")
def __init__(self):
super(Hexdump, self).__init__("hexdump", gdb.COMMAND_USER)
def invoke(self, args, from_tty):
argv = args.split(" ")
argc = len(argv)
address = 0
size = 0
if (argv[0] == ""):
gdb.write("Usage: hexdump address/symbol <size>\n")
return
if is_decimal(argv[0]) or is_hexadecimal(argv[0]):
address = int(argv[0], 0)
size = int(argv[1], 0)
else:
var = gdb.parse_and_eval(f'{argv[0]}')
address = int(var.address)
size = int(var.type.sizeof)
gdb.write(f"{argv[0]} {hex(address)} {int(size)}\n")
def get_register_byname(regname, tcb=None):
frame = gdb.selected_frame()
hexdump(address, size)
# If no tcb is given then we can directly used the register from
# the cached frame by GDB
if not tcb:
return int(frame.read_register(regname))
Hexdump()
# Ok, let's take it from the context in the given tcb
arch = frame.architecture()
tcbinfo = gdb.parse_and_eval("g_tcbinfo")
i = 0
for reg in arch.registers():
if reg.name == regname:
break
i += 1
regs = tcb["xcp"]["regs"].cast(gdb.lookup_type("char").pointer())
value = gdb.Value(regs + tcbinfo["reg_off"]["p"][i]).cast(
gdb.lookup_type("uintptr_t").pointer()
)[0]
return int(value)
def get_tcbs():
# In case we have created/deleted tasks at runtime, the tcbs will change
# so keep it as fresh as possible
pidhash = gdb.parse_and_eval("g_pidhash")
npidhash = gdb.parse_and_eval("g_npidhash")
return [pidhash[i] for i in range(0, npidhash) if pidhash[i]]