Last time we came to the problem that we wanted a stack trace (aka stack backtrace) of a running MPI application. One possibility would be attaching with gdb to the process to generate one. Another option is the pstack command but therefore you must allow applications to read other applications memory.
And at least as a computer scientist you always have the option of doing it yourself. Based on some articles pthread-overload.c, iX, and Linux Magazin a library was created which prints a backtrace when SIGUSR1, SIGSEGV, etc is received.
Therefore a shared library is needed which can be loaded during process startup by listing it in the LD_PRELOAD environment variable. Inside the library a function, attributed with the constructor attribute, must install a signal handler for signals that should generate a backtrace. The backtrace itself can be obtained by using the backtrace and backtrace_symbols functions of the glibc.
Essentially this is what’s PTrace.cpp does. Just compile it with make or make TARGET=debug generating ptrace.so and ptrace-dbg.so, respectively.
After some digging around in the net if found that /lib/libSegFault.so does the same. So
$ env LD_PRELOAD=/lib/libSegFault.so app-which-creates-segfault
prints an even more nice backtrace than the solution discussed here.
Usage sample 1:
$ env LD_PRELOAD=./ptrace-dbg.so ./crash
gives
[ptrace] DEBUG: initializing
[ptrace] DEBUG: overriding: SIGUSR1 (10) SIGINT (2) SIGQUIT (3) SIGILL (4) SIGFPE (8) SIGSEGV (11) SIGBUS (7)
[ptrace] DEBUG: aborting: SIGINT (2) SIGQUIT (3) SIGILL (4) SIGFPE (8) SIGSEGV (11) SIGBUS (7)
[ptrace] DEBUG: initializing done
-> function main
-> function first
-> function second
[ptrace] Received signal SIGSEGV (11).
[ptrace] 0: /xxxxxxxxxxxxxxx/ptrace/ptrace-dbg.so(PtraceSignalHandler+0x66) [0x2b22126f9c26]
[ptrace] 1: /lib64/libc.so.6 [0x2b22130cb2d0]
[ptrace] 2: ./crash(_Z6secondv+0x23) [0x40081b]
[ptrace] 3: ./crash(_Z5firstv+0x18) [0x40083c]
[ptrace] 4: ./crash(main+0x23) [0x400861]
[ptrace] 5: /lib64/libc.so.6(__libc_start_main+0xf4) [0x2b22130b8994]
[ptrace] 6: ./crash(__gxx_personality_v0+0x41) [0x400749]
[ptrace] DEBUG: Calling exit() after handled signal SIGSEGV (11).
[ptrace] DEBUG: finalizing
[ptrace] DEBUG: finalizing done
Usage sample 2:
$env LD_PRELOAD=./ptrace-dbg.so sleep 30
$kill -USR1 <pid of started sleep>
gives
[ptrace] DEBUG: initializing
[ptrace] DEBUG: overriding: SIGUSR1 (10) SIGINT (2) SIGQUIT (3) SIGILL (4) SIGFPE (8) SIGSEGV (11) SIGBUS (7)
[ptrace] DEBUG: aborting: SIGINT (2) SIGQUIT (3) SIGILL (4) SIGFPE (8) SIGSEGV (11) SIGBUS (7)
[ptrace] DEBUG: initializing done
[ptrace] Received signal SIGUSR1 (10).
[ptrace] 0: /xxxxxxxxxxxxxxx/ptrace-dbg.so(PtraceSignalHandler+0x66) [0x2ab7cee65c26]
[ptrace] 1: /lib64/libc.so.6 [0x2ab7cf0a52d0]
[ptrace] 2: /lib64/libc.so.6(nanosleep+0x10) [0x2ab7cf10f3c0]
[ptrace] 3: sleep [0x4029c4]
[ptrace] 4: sleep [0x40142c]
[ptrace] 5: /lib64/libc.so.6(__libc_start_main+0xf4) [0x2ab7cf092994]
[ptrace] 6: sleep [0x401079]
[ptrace] DEBUG: finalizing
[ptrace] DEBUG: finalizing done
PTrace.cpp
/* ===========================================================================
*
* Filename: PTrace.c
*
* Description: Shared library printing backtrace (stack trace)
* when signal is caught.
*
* Version: 0.1
* Created: 2011-02-08
*
* Author: Markus Wittmann (mw), markus.wittmann@rrze.uni-erlangen.de
* Company: RRZE Erlangen
* Project: ptrace
* Copyright: Copyright (c) 2011, Markus Wittmann
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, v2, as
* published by the Free Software Foundation
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* ===========================================================================
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <execinfo.h>
#ifdef DEBUG
#define Debug(formatString, ...) \
fprintf(stderr, "[ptrace] DEBUG: " formatString, ##__VA_ARGS__)
#else
#define Debug(formatString, ...)
#endif
#define Print(formatString, ...) \
fprintf(stderr, "[ptrace] " formatString, ##__VA_ARGS__)
#define Error(formatString, ...) \
fprintf(stderr, "[ptrace] ERROR: " formatString, ##__VA_ARGS__)
#define N_ELEMS(x) (sizeof(x) / sizeof(x[0]))
#define BACKTRACE_DEPTH 20
// List the signals for which a backtrace should be generated if
// received by the application.
#define OVERRIDE_SIGNALS \
X(SIGUSR1) \
X(SIGINT) \
X(SIGQUIT) \
X(SIGILL) \
X(SIGFPE) \
X(SIGSEGV) \
X(SIGBUS)
// Just returning from a signal handler might cause an infinit loop
// for some signals. If such signals are overriden (i.e. listed above)
// list them below. After handling such a signal exit(EXIT_FAILURE)
// will be called to avoid this undesired behaviour.
#define ABORT_SIGNALS \
X(SIGINT) \
X(SIGQUIT) \
X(SIGILL) \
X(SIGFPE) \
X(SIGSEGV) \
X(SIGBUS)
#define X(sig) sig,
int g_overrideSignals[] = { OVERRIDE_SIGNALS };
int g_abortSignals[] = { ABORT_SIGNALS };
#undef X
#define X(sig) #sig,
char * g_overrideSignalsStr[] = { OVERRIDE_SIGNALS };
char * g_abortSignalsStr[] = { ABORT_SIGNALS };
#undef X
/*****************************************************************************
* declaration of functions
*****************************************************************************/
void PtraceInit();
extern "C" void PtraceSignalHandler(int signalNumber, siginfo_t * signalInfo, void * context);
/*****************************************************************************
* functions called on library load and unload
*****************************************************************************/
static void _ptrace_initialize() __attribute__((constructor));
static void _ptrace_initialize()
{
Debug("initializing\n");
#ifdef DEBUG
fprintf(stderr, "[ptrace] DEBUG: overriding: ");
for (unsigned int i = 0; i < N_ELEMS(g_overrideSignals); ++i) {
fprintf(stderr, "%s (%d) ", g_overrideSignalsStr[i], g_overrideSignals[i]);
}
fprintf(stderr, "\n");
fprintf(stderr, "[ptrace] DEBUG: aborting: ");
for (unsigned int i = 0; i < N_ELEMS(g_abortSignals); ++i) {
fprintf(stderr, "%s (%d) ", g_abortSignalsStr[i], g_abortSignals[i]);
}
fprintf(stderr, "\n");
#endif /* DEBUG */
PtraceInit();
Debug("initializing done\n");
}
static void _ptrace_finalize() __attribute__((destructor));
static void _ptrace_finalize()
{
Debug("finalizing\n");
Debug("finalizing done\n");
}
/*****************************************************************************
* definition of functions
*****************************************************************************/
void PtraceInit()
{
struct sigaction action;
memset(&action, 0, sizeof(action));
action.sa_sigaction = PtraceSignalHandler;
sigfillset(&action.sa_mask);
action.sa_flags = SA_SIGINFO | SA_NODEFER;
int error;
for (unsigned int i = 0; i < N_ELEMS(g_overrideSignals); ++i) {
error = sigaction(g_overrideSignals[i], &action, NULL);
if (error == -1) {
Error("Installing signal handler for signal %s (%d) failed: error %d.\n",
g_overrideSignalsStr[i], g_overrideSignals[i], error);
exit(EXIT_FAILURE);
}
}
return;
}
void PtraceSignalHandler(int signalNumber, siginfo_t * signalInfo, void * context)
{
char * signalName = "unknown";
for (unsigned int i = 0; i < N_ELEMS(g_overrideSignals); ++i) {
if (g_overrideSignals[i] == signalNumber) {
signalName = g_overrideSignalsStr[i];
break;
}
}
Print("Received signal %s (%d).\n", signalName, signalNumber);
int nAddresses;
void * addresses[BACKTRACE_DEPTH];
char ** symbols = NULL;
nAddresses = backtrace(addresses, BACKTRACE_DEPTH);
symbols = backtrace_symbols(addresses, nAddresses);
if (symbols == NULL) {
Error("Retrieving symbols failed.\n");
exit(EXIT_FAILURE);
}
for (int k = 0; k < nAddresses; ++k) {
Print("%d: %s\n", k, symbols[k]);
}
for (unsigned int i = 0; i < N_ELEMS(g_abortSignals); ++i) {
if (signalNumber == g_abortSignals[i]) {
Debug("Calling exit() after handled signal %s (%d).\n",
g_abortSignalsStr[i], signalNumber);
exit(EXIT_FAILURE);
}
}
return;
}
Makefile
CXX = g++
CXXFLAGS = -O2 -Wall -shared -fPIC
PPFLAGS =
D = -D
EXE_SUFFIX =
ifeq (debug,$(TARGET))
PPFLAGS += $(D)DEBUG
EXE_SUFFIX = -dbg
endif
PTRACE = ptrace$(EXE_SUFFIX).so
.phony: clean
$(PTRACE): PTrace.cpp
$(CXX) $(CXXFLAGS) $(PPFLAGS) $< -o $@
clean:
rm -f *.o
rm -f $(TARGET)
[Update:A colleague pointed out that nearly the same can be achieved by using libsegfault.so in combination with LD_PRELOAD to get a stacktrace, register dump and a memory map. Using catchsegv gives the same results, but without the need to use LD_PRELOAD.]