Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ RUN apt-get update \
python3.13-dev \
python3.13-dbg \
python3.13-venv \
python3.14-dev \
python3.14-dbg \
python3.14-venv \
make \
cmake \
gdb \
Expand Down
5 changes: 5 additions & 0 deletions news/279.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Fix an issue where the PID offset could not be determined when multiple
subinterpreters were present. Previously, pystack only checked the first
interpreter in the linked list, which was not guaranteed to be the main
interpreter. The fix now iterates over all interpreters and correctly locates
the TID.
3 changes: 3 additions & 0 deletions news/279.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Add support for subinterpreters when reporting pure Python stacks. Threads
running in subinterpreters are now identified and grouped by interpreter ID.
Native stack reporting for subinterpreters is not yet supported.
Comment on lines +1 to +3
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Add support for subinterpreters when reporting pure Python stacks. Threads
running in subinterpreters are now identified and grouped by interpreter ID.
Native stack reporting for subinterpreters is not yet supported.
PyStack now supports reporting stacks for processes using subinterpreters.

1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
"src/pystack/_pystack.pyx",
"src/pystack/_pystack/corefile.cpp",
"src/pystack/_pystack/elf_common.cpp",
"src/pystack/_pystack/interpreter.cpp",
"src/pystack/_pystack/logging.cpp",
"src/pystack/_pystack/mem.cpp",
"src/pystack/_pystack/process.cpp",
Expand Down
2 changes: 0 additions & 2 deletions src/pystack/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from ._version import __version__
from .traceback_formatter import print_thread

__all__ = [
"__version__",
"print_thread",
]
10 changes: 7 additions & 3 deletions src/pystack/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
from pystack.process import is_gzip

from . import errors
from . import print_thread
from .colors import colored
from .engine import CoreFileAnalyzer
from .engine import NativeReportingMode
from .engine import StackMethod
from .engine import get_process_threads
from .engine import get_process_threads_for_core
from .traceback_formatter import TracebackPrinter

PERMISSION_ERROR_MSG = "Operation not permitted"
NO_SUCH_PROCESS_ERROR_MSG = "No such process"
Expand Down Expand Up @@ -285,14 +285,17 @@ def process_remote(parser: argparse.ArgumentParser, args: argparse.Namespace) ->
if not args.block and args.native_mode != NativeReportingMode.OFF:
parser.error("Native traces are only available in blocking mode")

printer = TracebackPrinter(
native_mode=args.native_mode, include_subinterpreters=True
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of hardcoding include_subinterpreters to True, I think we should do it conditionally based on whether or not there actually are multiple interpreters. I don't like having the extra indenting and the extra Interpreter-0 (main) in the overwhelmingly common case where there's only a single interpreter in the process. That'll be the case at least 99.9% of the time and maybe more.

)
for thread in get_process_threads(
args.pid,
stop_process=args.block,
native_mode=args.native_mode,
locals=args.locals,
method=StackMethod.ALL if args.exhaustive else StackMethod.AUTO,
):
print_thread(thread, args.native_mode)
printer.print_thread(thread)


def format_psinfo_information(psinfo: Dict[str, Any]) -> str:
Expand Down Expand Up @@ -414,6 +417,7 @@ def process_core(parser: argparse.ArgumentParser, args: argparse.Namespace) -> N
elf_id if elf_id else "<MISSING>",
)

printer = TracebackPrinter(args.native_mode, include_subinterpreters=True)
for thread in get_process_threads_for_core(
corefile,
executable,
Expand All @@ -422,7 +426,7 @@ def process_core(parser: argparse.ArgumentParser, args: argparse.Namespace) -> N
locals=args.locals,
method=StackMethod.ALL if args.exhaustive else StackMethod.AUTO,
):
print_thread(thread, args.native_mode)
printer.print_thread(thread)


if __name__ == "__main__": # pragma: no cover
Expand Down
117 changes: 111 additions & 6 deletions src/pystack/_pystack.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ from _pystack.elf_common cimport CoreFileAnalyzer as NativeCoreFileAnalyzer
from _pystack.elf_common cimport ProcessAnalyzer as NativeProcessAnalyzer
from _pystack.elf_common cimport SectionInfo
from _pystack.elf_common cimport getSectionInfo
from _pystack.interpreter cimport InterpreterUtils
from _pystack.logging cimport initializePythonLoggerInterface
from _pystack.mem cimport AbstractRemoteMemoryManager
from _pystack.mem cimport MemoryMapInformation as CppMemoryMapInformation
Expand Down Expand Up @@ -65,6 +66,7 @@ from .types import NativeFrame
from .types import PyCodeObject
from .types import PyFrame
from .types import PyThread
from .types import frame_type

LOGGER = logging.getLogger(__file__)

Expand Down Expand Up @@ -462,6 +464,7 @@ cdef object _construct_threads_from_interpreter_state(
bint add_native_traces,
bint resolve_locals,
):
interpreter_id = InterpreterUtils.getInterpreterId(manager, head)
LOGGER.info("Fetching Python threads")
threads = []

Expand All @@ -486,7 +489,9 @@ cdef object _construct_threads_from_interpreter_state(
current_thread.isGilHolder(),
current_thread.isGCCollecting(),
python_version,
interpreter_id,
name=get_thread_name(pid, current_thread.Tid()),
stack_anchor=current_thread.StackAnchor(),
)
)
current_thread = (
Expand All @@ -495,6 +500,91 @@ cdef object _construct_threads_from_interpreter_state(

return threads


def _entry_frame_count(thread: PyThread) -> int:
return sum(1 for frame in thread.all_frames if frame.is_entry)


def _eval_frame_positions(thread: PyThread):
if not thread.python_version:
return []
return [
index
for index, native_frame in enumerate(thread.native_frames)
if frame_type(native_frame, thread.python_version) == NativeFrame.FrameType.EVAL
]


Comment on lines +508 to +517
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unused

Suggested change
def _eval_frame_positions(thread: PyThread):
if not thread.python_version:
return []
return [
index
for index, native_frame in enumerate(thread.native_frames)
if frame_type(native_frame, thread.python_version) == NativeFrame.FrameType.EVAL
]

def _slice_native_stacks_for_same_tid_threads(threads) -> None:
if len(threads) < 2:
return
Comment on lines +519 to +520
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This duplicates the if len(group) <= 1: check in the caller; we should remove it from one or the other.


canonical = next((thread for thread in threads if thread.native_frames), None)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think I follow this at all. Won't each of the threads in a group have its own copy of exactly the same list of native frames? I understand how this could ever return anything but the first thread in the list (unless native_frames is an empty list for all of them, I guess)

if canonical is None:
return

canonical_frames = list(canonical.native_frames)
eval_positions = [
index
for index, native_frame in enumerate(canonical_frames)
if frame_type(native_frame, canonical.python_version) == NativeFrame.FrameType.EVAL
]
if not eval_positions:
return

entry_counts = [_entry_frame_count(thread) for thread in threads]
if sum(entry_counts) != len(eval_positions):
LOGGER.debug(
"Skipping same-tid native slicing for tid %s due to mismatched counts: "
"entry=%s eval=%s",
threads[0].tid,
sum(entry_counts),
len(eval_positions),
)
return

ordered_threads = sorted(
enumerate(threads),
key=lambda item: (
item[1].stack_anchor is None,
-(item[1].stack_anchor or 0),
item[0],
),
Comment on lines +548 to +552
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs a comment so bad. Checking my understanding, this sorts:

  • Everything with a stack anchor before everything without one
  • Then as a tiebreaker sorts by stack anchor in descending order
  • Then as a tiebreaker sorts by the index of the thread in the input list in ascending order

)

cursor = 0
for _, thread in ordered_threads:
required_eval_frames = _entry_frame_count(thread)
if required_eval_frames == 0:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How could this happen? If there is a Python stack, there will always be at least 1 entry frame, right?

I guess this could happen if we've acquired the GIL (so there's a thread state) but we don't yet have any Python stack for that thread state?

thread.native_frames = []
continue

group_start = cursor
group_end = cursor + required_eval_frames
prev_eval = eval_positions[group_start - 1] if group_start > 0 else -1
next_eval = (
eval_positions[group_end]
if group_end < len(eval_positions)
else len(canonical_frames)
Comment on lines +566 to +568
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait, how could group_end < len(eval_positions) be false, given that we checked above that sum(entry_counts) == len(eval_positions)? We know that things pair up correctly, no?

)
thread.native_frames = canonical_frames[prev_eval + 1 : next_eval]
cursor = group_end


def _normalize_python_threads(threads, native_mode: NativeReportingMode):
if native_mode == NativeReportingMode.OFF:
return threads

threads_by_tid = {}
for thread in threads:
threads_by_tid.setdefault(thread.tid, []).append(thread)

for group in threads_by_tid.values():
if len(group) <= 1:
continue
_slice_native_stacks_for_same_tid_threads(group)
return threads

cdef object _construct_os_thread(
shared_ptr[AbstractProcessManager] manager, int pid, int tid
):
Expand Down Expand Up @@ -622,7 +712,8 @@ def _get_process_threads(
)

all_tids = list(manager.get().Tids())
if head:
threads = []
while head:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a minor nit, but - this winds up printing the subinterpreters from the most recently spawned to the least, printing the main interpreter last. I wonder if it would be more useful to reverse the list of interpreters, so that the main interpreter's threads are printed first...

What do you think, @pablogsal ?

add_native_traces = native_mode != NativeReportingMode.OFF
for thread in _construct_threads_from_interpreter_state(
manager,
Expand All @@ -634,7 +725,11 @@ def _get_process_threads(
):
if thread.tid in all_tids:
all_tids.remove(thread.tid)
yield thread
threads.append(thread)
head = InterpreterUtils.getNextInterpreter(manager, head)

for thread in _normalize_python_threads(threads, native_mode):
yield thread
Comment on lines +731 to +732
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there some reason not to use yield from here?


if native_mode == NativeReportingMode.ALL:
yield from _construct_os_threads(manager, pid, all_tids)
Expand Down Expand Up @@ -768,15 +863,25 @@ def _get_process_threads_for_core(
)

all_tids = list(manager.get().Tids())
threads = []

if head:
native = native_mode in {NativeReportingMode.PYTHON, NativeReportingMode.ALL}
while head:
add_native_traces = native_mode != NativeReportingMode.OFF
for thread in _construct_threads_from_interpreter_state(
manager, head, pymanager.pid, pymanager.python_version, native, locals
manager,
head,
pymanager.pid,
pymanager.python_version,
add_native_traces,
locals,
):
if thread.tid in all_tids:
all_tids.remove(thread.tid)
yield thread
threads.append(thread)
head = InterpreterUtils.getNextInterpreter(manager, head)

for thread in _normalize_python_threads(threads, native_mode):
yield thread
Comment on lines +883 to +884
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there some reason not to use yield from here?


if native_mode == NativeReportingMode.ALL:
yield from _construct_os_threads(manager, pymanager.pid, all_tids)
3 changes: 2 additions & 1 deletion src/pystack/_pystack/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ add_library(_pystack STATIC
pythread.cpp
version.cpp
elf_common.cpp
pytypes.cpp)
pytypes.cpp
interpreter.cpp)
set_property(TARGET _pystack PROPERTY POSITION_INDEPENDENT_CODE ON)
include_directories("." "cpython" ${PYTHON_INCLUDE_DIRS})
4 changes: 2 additions & 2 deletions src/pystack/_pystack/cpython/interpreter.h
Original file line number Diff line number Diff line change
Expand Up @@ -375,10 +375,10 @@ struct _gil_runtime_state
int locked;
unsigned long switch_number;
pthread_cond_t cond;
pthread_cond_t mutex;
pthread_mutex_t mutex;
#ifdef FORCE_SWITCHING
pthread_cond_t switch_cond;
pthread_cond_t switch_mutex;
pthread_mutex_t switch_mutex;
#endif
};

Expand Down
36 changes: 36 additions & 0 deletions src/pystack/_pystack/interpreter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#include <memory>

#include "interpreter.h"
#include "logging.h"
#include "process.h"
#include "structure.h"
#include "version.h"

namespace pystack {

remote_addr_t
InterpreterUtils::getNextInterpreter(
const std::shared_ptr<const AbstractProcessManager>& manager,
remote_addr_t interpreter_addr)
{
Structure<py_is_v> is(manager, interpreter_addr);
return is.getField(&py_is_v::o_next);
}

int64_t
InterpreterUtils::getInterpreterId(
const std::shared_ptr<const AbstractProcessManager>& manager,
remote_addr_t interpreter_addr)
{
if (!manager->versionIsAtLeast(3, 7)) {
// No support for subinterpreters so the only interpreter is ID 0.
return 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm. This feels like the wrong way to handle this... It's technically possible to have multiple interpreters even for 3.6, though I don't think we'd support it particularly well if someone did it. But perhaps we should just use the address, or something like that, as a reasonable fallback? Maybe just

Suggested change
return 0;
return interpreter_addr;

Though we might want the function to return a uint64_t instead of an int64_t in that case...

Or maybe we should just make up a synthetic ID, counting up from 0...

I guess this is fine for now, maybe we can just punt on it unless and until someone complains...

}

Structure<py_is_v> is(manager, interpreter_addr);
int64_t id_value = is.getField(&py_is_v::o_id);

return id_value;
}

} // namespace pystack
24 changes: 24 additions & 0 deletions src/pystack/_pystack/interpreter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#pragma once

#include <cstdint>
#include <memory>

#include "mem.h"
#include "process.h"

namespace pystack {

class InterpreterUtils
{
public:
// Static Methods
static remote_addr_t getNextInterpreter(
const std::shared_ptr<const AbstractProcessManager>& manager,
remote_addr_t interpreter_addr);

static int64_t getInterpreterId(
const std::shared_ptr<const AbstractProcessManager>& manager,
remote_addr_t interpreter_addr);
};

} // namespace pystack
13 changes: 13 additions & 0 deletions src/pystack/_pystack/interpreter.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from _pystack.mem cimport remote_addr_t
from _pystack.process cimport AbstractProcessManager
from libc.stdint cimport int64_t
from libcpp.memory cimport shared_ptr


cdef extern from "interpreter.h" namespace "pystack":
cdef cppclass InterpreterUtils:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
cdef cppclass InterpreterUtils:
cdef cppclass InterpreterUtils:

@staticmethod
remote_addr_t getNextInterpreter(shared_ptr[AbstractProcessManager] manager, remote_addr_t interpreter_addr) except +

@staticmethod
int64_t getInterpreterId(shared_ptr[AbstractProcessManager] manager, remote_addr_t interpreter_addr) except +
1 change: 1 addition & 0 deletions src/pystack/_pystack/process.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,7 @@ AbstractProcessManager::copyDebugOffsets(Structure<py_runtime_v>& py_runtime, py
set_offset(py_is.o_sysdict, &py_runtime_v::o_dbg_off_interpreter_state_sysdict);
set_offset(py_is.o_builtins, &py_runtime_v::o_dbg_off_interpreter_state_builtins);
set_offset(py_is.o_gil_runtime_state, &py_runtime_v::o_dbg_off_interpreter_state_ceval_gil);
set_offset(py_is.o_id, &py_runtime_v::o_dbg_off_interpreter_state_id);

set_size(py_thread, &py_runtime_v::o_dbg_off_thread_state_struct_size);
set_offset(py_thread.o_prev, &py_runtime_v::o_dbg_off_thread_state_prev);
Expand Down
Loading
Loading