Only restore the signal handler if sigaction has not changed
Restoring the signal handler in ExceptionHandler::SignalHandler() can lead to a race in scenarios where multiple threads crash within a short time. This can cause threads to alternately try to write a minidump without ever terminating the process. The first thread to write a minidump will reset the signal handler to the SIG_DFL using signal() in InstallDefaultHandler(). The next thread to execute SignalHandler() will detect this and will reset the signal handler to SignalHandler(). If the first thread takes too long to write its minidump (e.g. when there are many threads), the chances increase that the second thread will enter SignalHandler() before the first one leaves the critical section. After resetting the signal handler, the second thread will fail to write a minidump (since the file already exists) and will try to reset the signal handler to the default by calling RestoreHandlersLocked(). However, in the meantime the first thread will have entered SignalHandler() again and will overwrite it one more time. After that, no further attempts will be made to restore the default signal handler and both threads will continue to re-raise the signal and attempt to write minidump files. This change adds a check to make sure that cur_handler.sa_sigaction is still pointing to SignalHandler() before re-installing the handler. To test this we start a large number of sleeping threads and two threads that will crash simultaneously. Without the fix, this would reproducibly lead to a loop between the two crashing threads. Bug: 752 Change-Id: I784328cfff17ddc7476d6668354570ab867ba405 Reviewed-on: https://chromium-review.googlesource.com/855137 Reviewed-by: Mike Frysinger <vapier@chromium.org>
This commit is contained in:
parent
ceef4a517d
commit
97a9883676
2 changed files with 60 additions and 0 deletions
|
@ -353,6 +353,7 @@ void ExceptionHandler::SignalHandler(int sig, siginfo_t* info, void* uc) {
|
|||
// will call the function with the right arguments.
|
||||
struct sigaction cur_handler;
|
||||
if (sigaction(sig, NULL, &cur_handler) == 0 &&
|
||||
cur_handler.sa_sigaction == SignalHandler &&
|
||||
(cur_handler.sa_flags & SA_SIGINFO) == 0) {
|
||||
// Reset signal handler with the right flags.
|
||||
sigemptyset(&cur_handler.sa_mask);
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <signal.h>
|
||||
|
@ -257,6 +258,64 @@ TEST(ExceptionHandlerTest, ChildCrashWithFD) {
|
|||
ASSERT_NO_FATAL_FAILURE(ChildCrash(true));
|
||||
}
|
||||
|
||||
static void* SleepFunction(void* unused) {
|
||||
while (true) usleep(1000000);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void* CrashFunction(void* b_ptr) {
|
||||
pthread_barrier_t* b = reinterpret_cast<pthread_barrier_t*>(b_ptr);
|
||||
pthread_barrier_wait(b);
|
||||
DoNullPointerDereference();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Tests that concurrent crashes do not enter a loop by alternately triggering
|
||||
// the signal handler.
|
||||
TEST(ExceptionHandlerTest, ParallelChildCrashesDontHang) {
|
||||
AutoTempDir temp_dir;
|
||||
const pid_t child = fork();
|
||||
if (child == 0) {
|
||||
google_breakpad::scoped_ptr<ExceptionHandler> handler(
|
||||
new ExceptionHandler(MinidumpDescriptor(temp_dir.path()), NULL, NULL,
|
||||
NULL, true, -1));
|
||||
|
||||
// We start a number of threads to make sure handling the signal takes
|
||||
// enough time for the second thread to enter the signal handler.
|
||||
int num_sleep_threads = 100;
|
||||
google_breakpad::scoped_array<pthread_t> sleep_threads(
|
||||
new pthread_t[num_sleep_threads]);
|
||||
for (int i = 0; i < num_sleep_threads; ++i) {
|
||||
ASSERT_EQ(0, pthread_create(&sleep_threads[i], NULL, SleepFunction,
|
||||
NULL));
|
||||
}
|
||||
|
||||
int num_crash_threads = 2;
|
||||
google_breakpad::scoped_array<pthread_t> crash_threads(
|
||||
new pthread_t[num_crash_threads]);
|
||||
// Barrier to synchronize crashing both threads at the same time.
|
||||
pthread_barrier_t b;
|
||||
ASSERT_EQ(0, pthread_barrier_init(&b, NULL, num_crash_threads + 1));
|
||||
for (int i = 0; i < num_crash_threads; ++i) {
|
||||
ASSERT_EQ(0, pthread_create(&crash_threads[i], NULL, CrashFunction, &b));
|
||||
}
|
||||
pthread_barrier_wait(&b);
|
||||
for (int i = 0; i < num_crash_threads; ++i) {
|
||||
ASSERT_EQ(0, pthread_join(crash_threads[i], NULL));
|
||||
}
|
||||
}
|
||||
|
||||
// Wait a while until the child should have crashed.
|
||||
usleep(100000);
|
||||
// Kill the child if it is still running.
|
||||
kill(child, SIGKILL);
|
||||
|
||||
// If the child process terminated by itself, it will have returned SIGSEGV.
|
||||
// If however it got stuck in a loop, it will have been killed by the
|
||||
// SIGKILL.
|
||||
ASSERT_NO_FATAL_FAILURE(WaitForProcessToTerminate(child, SIGSEGV));
|
||||
}
|
||||
|
||||
static bool DoneCallbackReturnFalse(const MinidumpDescriptor& descriptor,
|
||||
void* context,
|
||||
bool succeeded) {
|
||||
|
|
Loading…
Reference in a new issue