qxy11 7fb620a5cc
Fix a deadlock in ModuleList when starting a standalone lldb client/server (#148774)
Summary:
There was a deadlock was introduced by [PR
#146441](https://github.com/llvm/llvm-project/pull/146441) which changed
`CurrentThreadIsPrivateStateThread()` to
`CurrentThreadPosesAsPrivateStateThread()`. This change caused the
execution path in
[`ExecutionContextRef::SetTargetPtr()`](10b5558b61/lldb/source/Target/ExecutionContext.cpp (L513))
to now enter a code block that was previously skipped, triggering
[`GetSelectedFrame()`](10b5558b61/lldb/source/Target/ExecutionContext.cpp (L522))
which leads to a deadlock.

Thread 1 gets m_modules_mutex in
[`ModuleList::AppendImpl`](96148f9214/lldb/source/Core/ModuleList.cpp (L218)),
Thread 3 gets m_language_runtimes_mutex in
[`GetLanguageRuntime`](96148f9214/lldb/source/Target/Process.cpp (L1501)),
but then Thread 1 waits for m_language_runtimes_mutex in
[`GetLanguageRuntime`](96148f9214/lldb/source/Target/Process.cpp (L1501))
while Thread 3 waits for m_modules_mutex in
[`ScanForGNUstepObjCLibraryCandidate`](96148f9214/lldb/source/Plugins/LanguageRuntime/ObjC/GNUstepObjCRuntime/GNUstepObjCRuntime.cpp (L57)).

This fixes the deadlock by adding a scoped block around the mutex lock
before the call to the notifier, and moved the notifier call outside of
the mutex-guarded section. The notifier call
[`NotifyModuleAdded`](96148f9214/lldb/source/Target/Target.cpp (L1810))
should be thread-safe, since the module should be added to the
`ModuleList` before the mutex is released, and the notifier doesn't
modify the module list further, and the call is operates on local state
and the `Target` instance.

### Deadlocked Thread backtraces:
```
* thread #3, name = 'dbg.evt-handler', stop reason = signal SIGSTOP
  * frame #0: 0x00007f2f1e2973dc libc.so.6`futex_wait(private=0, expected=2, futex_word=0x0000563786bd5f40) at    futex-internal.h:146:13
   /*... a bunch of mutex related bt ... */    
   liblldb.so.21.0git`std::lock_guard<std::recursive_mutex>::lock_guard(this=0x00007f2f0f1927b0, __m=0x0000563786bd5f40) at std_mutex.h:229:19
    frame #8: 0x00007f2f27946eb7 liblldb.so.21.0git`ScanForGNUstepObjCLibraryCandidate(modules=0x0000563786bd5f28, TT=0x0000563786bd5eb8) at GNUstepObjCRuntime.cpp:60:41
    frame #9: 0x00007f2f27946c80 liblldb.so.21.0git`lldb_private::GNUstepObjCRuntime::CreateInstance(process=0x0000563785e1d360, language=eLanguageTypeObjC) at GNUstepObjCRuntime.cpp:87:8
    frame #10: 0x00007f2f2746fca5 liblldb.so.21.0git`lldb_private::LanguageRuntime::FindPlugin(process=0x0000563785e1d360, language=eLanguageTypeObjC) at LanguageRuntime.cpp:210:36
    frame #11: 0x00007f2f2742c9e3 liblldb.so.21.0git`lldb_private::Process::GetLanguageRuntime(this=0x0000563785e1d360, language=eLanguageTypeObjC) at Process.cpp:1516:9
    ...
    frame #21: 0x00007f2f2750b5cc liblldb.so.21.0git`lldb_private::Thread::GetSelectedFrame(this=0x0000563785e064d0, select_most_relevant=DoNoSelectMostRelevantFrame) at Thread.cpp:274:48
    frame #22: 0x00007f2f273f9957 liblldb.so.21.0git`lldb_private::ExecutionContextRef::SetTargetPtr(this=0x00007f2f0f193778, target=0x0000563786bd5be0, adopt_selected=true) at ExecutionContext.cpp:525:32
    frame #23: 0x00007f2f273f9714 liblldb.so.21.0git`lldb_private::ExecutionContextRef::ExecutionContextRef(this=0x00007f2f0f193778, target=0x0000563786bd5be0, adopt_selected=true) at ExecutionContext.cpp:413:3
    frame #24: 0x00007f2f270e80af liblldb.so.21.0git`lldb_private::Debugger::GetSelectedExecutionContext(this=0x0000563785d83bc0) at Debugger.cpp:1225:23
    frame #25: 0x00007f2f271bb7fd liblldb.so.21.0git`lldb_private::Statusline::Redraw(this=0x0000563785d83f30, update=true) at Statusline.cpp:136:41
    ...
* thread #1, name = 'lldb', stop reason = signal SIGSTOP
  * frame #0: 0x00007f2f1e2973dc libc.so.6`futex_wait(private=0, expected=2, futex_word=0x0000563785e1dd98) at futex-internal.h:146:13
   /*... a bunch of mutex related bt ... */    
   liblldb.so.21.0git`std::lock_guard<std::recursive_mutex>::lock_guard(this=0x00007ffe62be0488, __m=0x0000563785e1dd98) at std_mutex.h:229:19
    frame #8: 0x00007f2f2742c8d1 liblldb.so.21.0git`lldb_private::Process::GetLanguageRuntime(this=0x0000563785e1d360, language=eLanguageTypeC_plus_plus) at Process.cpp:1510:41
    frame #9: 0x00007f2f2743c46f liblldb.so.21.0git`lldb_private::Process::ModulesDidLoad(this=0x0000563785e1d360, module_list=0x00007ffe62be06a0) at Process.cpp:6082:36
    ...
    frame #13: 0x00007f2f2715cf03 liblldb.so.21.0git`lldb_private::ModuleList::AppendImpl(this=0x0000563786bd5f28, module_sp=ptr = 0x563785cec560, use_notifier=true) at ModuleList.cpp:246:19
    frame #14: 0x00007f2f2715cf4c liblldb.so.21.0git`lldb_private::ModuleList::Append(this=0x0000563786bd5f28, module_sp=ptr = 0x563785cec560, notify=true) at ModuleList.cpp:251:3
    ...
    frame #19: 0x00007f2f274349b3 liblldb.so.21.0git`lldb_private::Process::ConnectRemote(this=0x0000563785e1d360, remote_url=(Data = "connect://localhost:1234", Length = 24)) at Process.cpp:3250:9
    frame #20: 0x00007f2f27411e0e liblldb.so.21.0git`lldb_private::Platform::DoConnectProcess(this=0x0000563785c59990, connect_url=(Data = "connect://localhost:1234", Length = 24), plugin_name=(Data = "gdb-remote", Length = 10), debugger=0x0000563785d83bc0, stream=0x00007ffe62be3128, target=0x0000563786bd5be0, error=0x00007ffe62be1ca0) at Platform.cpp:1926:23
```

## Test Plan:
Built a hello world a.out
Run server in one terminal:
```
~/llvm/build/Debug/bin/lldb-server g :1234 a.out
```
Run client in another terminal
```
~/llvm/build/Debug/bin/lldb -o "gdb-remote 1234" -o "b hello.cc:3"
```

Before:
Client hangs indefinitely
```
~/llvm/build/Debug/bin/lldb -o "gdb-remote 1234" -o "b main"
(lldb) gdb-remote 1234

^C^C
```

After:
```
~/llvm/build/Debug/bin/lldb -o "gdb-remote 1234" -o "b hello.cc:3"
(lldb) gdb-remote 1234
Process 837068 stopped
* thread #1, name = 'a.out', stop reason = signal SIGSTOP
    frame #0: 0x00007ffff7fe4a60
ld-linux-x86-64.so.2`_start:
->  0x7ffff7fe4a60 <+0>: movq   %rsp, %rdi
    0x7ffff7fe4a63 <+3>: callq  0x7ffff7fe5780 ; _dl_start at rtld.c:522:1

ld-linux-x86-64.so.2`_dl_start_user:
    0x7ffff7fe4a68 <+0>: movq   %rax, %r12
    0x7ffff7fe4a6b <+3>: movl   0x18067(%rip), %eax ; _dl_skip_args
(lldb) b hello.cc:3
Breakpoint 1: where = a.out`main + 15 at hello.cc:4:13, address = 0x00005555555551bf
(lldb) c
Process 837068 resuming
Process 837068 stopped
* thread #1, name = 'a.out', stop reason = breakpoint 1.1
    frame #0: 0x00005555555551bf a.out`main at hello.cc:4:13
   1   	#include <iostream>
   2
   3   	int main() {
-> 4   	  std::cout << "Hello World" << std::endl;
   5   	  return 0;
   6   	}
```
2025-08-04 08:43:49 -07:00

202 lines
7.2 KiB
Python

import lldb
import os
import re
import socket
import time
from contextlib import closing
from lldbsuite.test.decorators import *
from lldbsuite.test.lldbtest import *
from lldbsuite.test.lldbpexpect import PExpectTest
from lldbgdbserverutils import get_lldb_server_exe
# PExpect uses many timeouts internally and doesn't play well
# under ASAN on a loaded machine..
@skipIfAsan
class TestStatusline(PExpectTest):
# Change this value to something smaller to make debugging this test less
# tedious.
TIMEOUT = 60
TERMINAL_HEIGHT = 10
TERMINAL_WIDTH = 60
def do_setup(self):
# Create a target and run to a breakpoint.
exe = self.getBuildArtifact("a.out")
self.expect(
"target create {}".format(exe), substrs=["Current executable set to"]
)
self.expect('breakpoint set -p "Break here"', substrs=["Breakpoint 1"])
self.expect("run", substrs=["stop reason"])
self.resize()
def resize(self, height=None, width=None):
height = self.TERMINAL_HEIGHT if not height else height
width = self.TERMINAL_WIDTH if not width else width
# Change the terminal dimensions. When we launch the tests, we reset
# all the settings, leaving the terminal dimensions unset.
self.child.setwinsize(height, width)
def test(self):
"""Basic test for the statusline."""
self.build()
self.launch(timeout=self.TIMEOUT)
self.do_setup()
# Enable the statusline and check for the control character and that we
# can see the target, the location and the stop reason.
self.expect('set set separator "| "')
self.expect(
"set set show-statusline true",
[
"\x1b[1;{}r".format(self.TERMINAL_HEIGHT - 1),
"a.out | main.c:2:11 | breakpoint 1.1 ",
],
)
# Change the terminal dimensions and make sure it's reflected immediately.
self.child.setwinsize(self.TERMINAL_HEIGHT, 25)
self.child.expect(re.escape("a.out | main.c:2:11 | bre"))
self.child.setwinsize(self.TERMINAL_HEIGHT, self.TERMINAL_WIDTH)
# Change the separator.
self.expect('set set separator "S "', ["a.out S main.c:2:11"])
# Change the format.
self.expect(
'set set statusline-format "target = {${target.file.basename}} ${separator}"',
["target = a.out S"],
)
self.expect('set set separator "| "')
# Hide the statusline and check or the control character.
self.expect("set set show-statusline false", ["\x1b[1;0r"])
def test_no_color(self):
"""Basic test for the statusline with colors disabled."""
self.build()
self.launch(use_colors=False, timeout=self.TIMEOUT)
self.do_setup()
# Enable the statusline and check for the "reverse video" control character.
self.expect(
"set set show-statusline true",
[
"\x1b[7m",
],
)
def test_deadlock(self):
"""Regression test for lock inversion between the statusline mutex and
the output mutex."""
self.build()
self.launch(
extra_args=["-o", "settings set use-color false"], timeout=self.TIMEOUT
)
self.child.expect("(lldb)")
self.resize()
exe = self.getBuildArtifact("a.out")
self.expect("file {}".format(exe), ["Current executable"])
self.expect("help", ["Debugger commands"])
def test_no_target(self):
"""Test that we print "no target" when launched without a target."""
self.launch(timeout=self.TIMEOUT)
self.resize()
self.expect("set set show-statusline true", ["no target"])
@skipIfEditlineSupportMissing
def test_resize(self):
"""Test that move the cursor when resizing."""
self.launch(timeout=self.TIMEOUT)
self.resize()
self.expect("set set show-statusline true", ["no target"])
self.resize(20, 60)
# Check for the escape code to resize the scroll window.
self.child.expect(re.escape("\x1b[1;19r"))
self.child.expect("(lldb)")
@skipIfRemote
@skipIfWindows
@skipIfDarwin
@add_test_categories(["lldb-server"])
def test_modulelist_deadlock(self):
"""Regression test for a deadlock that occurs when the status line is enabled before connecting
to a gdb-remote server.
"""
if get_lldb_server_exe() is None:
self.skipTest("lldb-server not found")
MAX_RETRY_ATTEMPTS = 10
DELAY = 0.25
def _find_free_port(host):
for attempt in range(MAX_RETRY_ATTEMPTS):
try:
family, type, proto, _, _ = socket.getaddrinfo(
host, 0, proto=socket.IPPROTO_TCP
)[0]
with closing(socket.socket(family, type, proto)) as sock:
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.bind((host, 0))
return sock.getsockname()
except OSError:
time.sleep(DELAY * 2**attempt) # Exponential backoff
raise RuntimeError(
"Could not find a free port on {} after {} attempts.".format(
host, MAX_RETRY_ATTEMPTS
)
)
def _wait_for_server_ready_in_log(log_file_path, ready_message):
"""Check log file for server ready message with retry logic"""
for attempt in range(MAX_RETRY_ATTEMPTS):
if os.path.exists(log_file_path):
with open(log_file_path, "r") as f:
if ready_message in f.read():
return
time.sleep(pow(2, attempt) * DELAY)
raise RuntimeError(
"Server not ready after {} attempts.".format(MAX_RETRY_ATTEMPTS)
)
self.build()
exe_path = self.getBuildArtifact("a.out")
server_log_file = self.getLogBasenameForCurrentTest() + "-lldbserver.log"
self.addTearDownHook(
lambda: os.path.exists(server_log_file) and os.remove(server_log_file)
)
# Find a free port for the server
addr = _find_free_port("localhost")
connect_address = "[{}]:{}".format(*addr)
commandline_args = [
"gdbserver",
connect_address,
exe_path,
"--log-file={}".format(server_log_file),
"--log-channels=lldb conn",
]
server_proc = self.spawnSubprocess(
get_lldb_server_exe(), commandline_args, install_remote=False
)
self.assertIsNotNone(server_proc)
# Wait for server to be ready by checking log file.
server_ready_message = "Listen to {}".format(connect_address)
_wait_for_server_ready_in_log(server_log_file, server_ready_message)
# Launch LLDB client and connect to lldb-server with statusline enabled
self.launch(timeout=self.TIMEOUT)
self.resize()
self.expect("settings set show-statusline true", ["no target"])
self.expect(
f"gdb-remote {connect_address}", [b"a.out \xe2\x94\x82 signal SIGSTOP"]
)