Only restore the signal handler if sigaction has not changed

Restoring the signal handler in ExceptionHandler::SignalHandler() can
lead to a race in scenarios where multiple threads crash within a short
time. This can cause threads to alternately try to write a minidump
without ever terminating the process.

The first thread to write a minidump will reset the signal handler to
the SIG_DFL using signal() in InstallDefaultHandler(). The next thread
to execute SignalHandler() will detect this and will reset the signal
handler to SignalHandler(). If the first thread takes too long to write
its minidump (e.g. when there are many threads), the chances increase
that the second thread will enter SignalHandler() before the first one
leaves the critical section.

After resetting the signal handler, the second thread will fail to write
a minidump (since the file already exists) and will try to reset the
signal handler to the default by calling RestoreHandlersLocked().
However, in the meantime the first thread will have entered
SignalHandler() again and will overwrite it one more time.

After that, no further attempts will be made to restore the default
signal handler and both threads will continue to re-raise the signal and
attempt to write minidump files.

This change adds a check to make sure that cur_handler.sa_sigaction is
still pointing to SignalHandler() before re-installing the handler.

To test this we start a large number of sleeping threads and two threads
that will crash simultaneously. Without the fix, this would reproducibly
lead to a loop between the two crashing threads.

Bug: 752
Change-Id: I784328cfff17ddc7476d6668354570ab867ba405
Reviewed-on: https://chromium-review.googlesource.com/855137
Reviewed-by: Mike Frysinger <vapier@chromium.org>
This commit is contained in:
Lars Volker 2018-01-08 14:09:07 -08:00 committed by Mike Frysinger
parent ceef4a517d
commit 97a9883676
2 changed files with 60 additions and 0 deletions

View file

@ -353,6 +353,7 @@ void ExceptionHandler::SignalHandler(int sig, siginfo_t* info, void* uc) {
// will call the function with the right arguments.
struct sigaction cur_handler;
if (sigaction(sig, NULL, &cur_handler) == 0 &&
cur_handler.sa_sigaction == SignalHandler &&
(cur_handler.sa_flags & SA_SIGINFO) == 0) {
// Reset signal handler with the right flags.
sigemptyset(&cur_handler.sa_mask);

View file

@ -27,6 +27,7 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <pthread.h>
#include <stdint.h>
#include <unistd.h>
#include <signal.h>
@ -257,6 +258,64 @@ TEST(ExceptionHandlerTest, ChildCrashWithFD) {
ASSERT_NO_FATAL_FAILURE(ChildCrash(true));
}
static void* SleepFunction(void* unused) {
while (true) usleep(1000000);
return NULL;
}
static void* CrashFunction(void* b_ptr) {
pthread_barrier_t* b = reinterpret_cast<pthread_barrier_t*>(b_ptr);
pthread_barrier_wait(b);
DoNullPointerDereference();
return NULL;
}
// Tests that concurrent crashes do not enter a loop by alternately triggering
// the signal handler.
TEST(ExceptionHandlerTest, ParallelChildCrashesDontHang) {
AutoTempDir temp_dir;
const pid_t child = fork();
if (child == 0) {
google_breakpad::scoped_ptr<ExceptionHandler> handler(
new ExceptionHandler(MinidumpDescriptor(temp_dir.path()), NULL, NULL,
NULL, true, -1));
// We start a number of threads to make sure handling the signal takes
// enough time for the second thread to enter the signal handler.
int num_sleep_threads = 100;
google_breakpad::scoped_array<pthread_t> sleep_threads(
new pthread_t[num_sleep_threads]);
for (int i = 0; i < num_sleep_threads; ++i) {
ASSERT_EQ(0, pthread_create(&sleep_threads[i], NULL, SleepFunction,
NULL));
}
int num_crash_threads = 2;
google_breakpad::scoped_array<pthread_t> crash_threads(
new pthread_t[num_crash_threads]);
// Barrier to synchronize crashing both threads at the same time.
pthread_barrier_t b;
ASSERT_EQ(0, pthread_barrier_init(&b, NULL, num_crash_threads + 1));
for (int i = 0; i < num_crash_threads; ++i) {
ASSERT_EQ(0, pthread_create(&crash_threads[i], NULL, CrashFunction, &b));
}
pthread_barrier_wait(&b);
for (int i = 0; i < num_crash_threads; ++i) {
ASSERT_EQ(0, pthread_join(crash_threads[i], NULL));
}
}
// Wait a while until the child should have crashed.
usleep(100000);
// Kill the child if it is still running.
kill(child, SIGKILL);
// If the child process terminated by itself, it will have returned SIGSEGV.
// If however it got stuck in a loop, it will have been killed by the
// SIGKILL.
ASSERT_NO_FATAL_FAILURE(WaitForProcessToTerminate(child, SIGSEGV));
}
static bool DoneCallbackReturnFalse(const MinidumpDescriptor& descriptor,
void* context,
bool succeeded) {