From 01431c2f61aa2af1804f1e139da9bc7c4afa9e7b Mon Sep 17 00:00:00 2001 From: Leonard Mosescu Date: Tue, 11 Jul 2017 12:26:50 -0700 Subject: [PATCH] Handle very large stack traces The main motivation for this change is to handle very large stack traces, normally the result of infinite recursion. This part is actually fairly simple, relaxing a few self-imposed limits on how many frames we can unwind and the max size for stack memory. Relaxing these limits requires stricter and more consistent checks for stack unwinding. There are a number of unwinding invariants that apply to all the platforms: 1. stack pointer (and frame pointer) must be within the stack memory (frame pointer, if preset, must point to the right frame too) 2. unwinding must monotonically increase SP (except for the first frame unwind, this must be a strict increase) 3. Instruction pointer (return address) must point to a valid location 4. stack pointer (and frame pointer) must be appropriately aligned This change is focused on 2), which is enough to guarantee that the unwinding doesn't get stuck in an infinite loop. 1) is implicitly validated part of accessing the stack memory (explicit checks might be nice though). 4) is ABI specific and while it may be valuable in catching suspicious frames is not in the scope of this change. 3) is also an interesting check but thanks to just-in-time compilation it's more complex than just calling StackWalker::InstructionAddressSeemsValid() and we don't want to drop parts of the callstack due to an overly conservative check. Bug: chromium:735989 Change-Id: I9aaba77c7fd028942d77c87d51b5e6f94e136ddd Reviewed-on: https://chromium-review.googlesource.com/563771 Reviewed-by: Mark Mentovai Reviewed-by: Ivan Penkov --- src/google_breakpad/processor/minidump.h | 3 +- src/google_breakpad/processor/stackwalker.h | 17 +++++++- src/processor/microdump_processor_unittest.cc | 4 +- src/processor/minidump.cc | 2 +- src/processor/stackwalker.cc | 41 ++++++++++++++++--- src/processor/stackwalker_amd64.cc | 30 +++----------- src/processor/stackwalker_amd64.h | 8 ---- src/processor/stackwalker_arm.cc | 16 +++----- src/processor/stackwalker_arm64.cc | 15 +++---- src/processor/stackwalker_mips.cc | 15 +++---- src/processor/stackwalker_ppc.cc | 8 ++++ src/processor/stackwalker_ppc64.cc | 8 ++++ src/processor/stackwalker_sparc.cc | 8 ++++ src/processor/stackwalker_x86.cc | 14 +++---- 14 files changed, 108 insertions(+), 81 deletions(-) diff --git a/src/google_breakpad/processor/minidump.h b/src/google_breakpad/processor/minidump.h index 2651a323..34a2a44d 100644 --- a/src/google_breakpad/processor/minidump.h +++ b/src/google_breakpad/processor/minidump.h @@ -257,8 +257,7 @@ class MinidumpMemoryRegion : public MinidumpObject, bool hexdump_; unsigned int hexdump_width_; - // The largest memory region that will be read from a minidump. The - // default is 1MB. + // The largest memory region that will be read from a minidump. static uint32_t max_bytes_; // Base address and size of the memory region, and its position in the diff --git a/src/google_breakpad/processor/stackwalker.h b/src/google_breakpad/processor/stackwalker.h index 4378f75b..0c458d50 100644 --- a/src/google_breakpad/processor/stackwalker.h +++ b/src/google_breakpad/processor/stackwalker.h @@ -126,7 +126,15 @@ class Stackwalker { // * This address is within a loaded module for which we have symbols, // and falls inside a function in that module. // Returns false otherwise. - bool InstructionAddressSeemsValid(uint64_t address); + bool InstructionAddressSeemsValid(uint64_t address) const; + + // Checks whether we should stop the stack trace. + // (either we reached the end-of-stack or we detected a + // broken callstack invariant) + bool TerminateWalk(uint64_t caller_ip, + uint64_t caller_sp, + uint64_t callee_sp, + bool first_unwind) const; // The default number of words to search through on the stack // for a return address. @@ -217,6 +225,13 @@ class Stackwalker { // the caller. |stack_scan_allowed| controls whether stack scanning is // an allowable frame-recovery method, since it is desirable to be able to // disable stack scanning in performance-critical use cases. + // + // CONSIDER: a way to differentiate between: + // - full stack traces + // - explicitly truncated traces (max_frames_) + // - stopping after max scanned frames + // - failed stack walk (breaking one of the stack walk invariants) + // virtual StackFrame* GetCallerFrame(const CallStack* stack, bool stack_scan_allowed) = 0; diff --git a/src/processor/microdump_processor_unittest.cc b/src/processor/microdump_processor_unittest.cc index af897f7d..238b048c 100644 --- a/src/processor/microdump_processor_unittest.cc +++ b/src/processor/microdump_processor_unittest.cc @@ -132,7 +132,7 @@ TEST_F(MicrodumpProcessorTest, TestProcess_MissingSymbols) { ASSERT_EQ("arm64", state.system_info()->cpu); ASSERT_EQ("OS 64 VERSION INFO", state.system_info()->os_version); ASSERT_EQ(1U, state.threads()->size()); - ASSERT_EQ(12U, state.threads()->at(0)->frames()->size()); + ASSERT_EQ(11U, state.threads()->at(0)->frames()->size()); ASSERT_EQ("", state.threads()->at(0)->frames()->at(0)->function_name); @@ -205,7 +205,7 @@ TEST_F(MicrodumpProcessorTest, TestProcessX86) { ASSERT_EQ("x86", state.system_info()->cpu); ASSERT_EQ("asus/WW_Z00A/Z00A:5.0/LRX21V/2.19.40.22_20150627_5104_user:user/" "release-keys", state.system_info()->os_version); - ASSERT_EQ(56U, state.threads()->at(0)->frames()->size()); + ASSERT_EQ(17U, state.threads()->at(0)->frames()->size()); ASSERT_EQ("libc.so", state.threads()->at(0)->frames()->at(0)->module->debug_file()); // TODO(mmandlis): Get symbols for the test X86 microdump and test function diff --git a/src/processor/minidump.cc b/src/processor/minidump.cc index 430d49d9..d2d69202 100644 --- a/src/processor/minidump.cc +++ b/src/processor/minidump.cc @@ -1195,7 +1195,7 @@ bool MinidumpContext::CheckAgainstSystemInfo(uint32_t context_cpu_type) { // -uint32_t MinidumpMemoryRegion::max_bytes_ = 2 * 1024 * 1024; // 2MB +uint32_t MinidumpMemoryRegion::max_bytes_ = 64 * 1024 * 1024; // 64MB MinidumpMemoryRegion::MinidumpMemoryRegion(Minidump* minidump) diff --git a/src/processor/stackwalker.cc b/src/processor/stackwalker.cc index c85ce5e8..4988ef1e 100644 --- a/src/processor/stackwalker.cc +++ b/src/processor/stackwalker.cc @@ -60,10 +60,15 @@ namespace google_breakpad { const int Stackwalker::kRASearchWords = 40; -uint32_t Stackwalker::max_frames_ = 1024; +// This default is just a sanity check: a large enough value +// that allow capturing unbounded recursion traces, yet provide a +// guardrail against stack walking bugs. The stack walking invariants +// guarantee that the unwinding process is strictly monotonic and +// practically bounded by the size of the stack memory range. +uint32_t Stackwalker::max_frames_ = 1 << 20; // 1M bool Stackwalker::max_frames_set_ = false; -uint32_t Stackwalker::max_frames_scanned_ = 1024; +uint32_t Stackwalker::max_frames_scanned_ = 1 << 14; // 16k Stackwalker::Stackwalker(const SystemInfo* system_info, MemoryRegion* memory, @@ -234,7 +239,7 @@ Stackwalker* Stackwalker::StackwalkerForCPU( context->GetContextSPARC(), memory, modules, frame_symbolizer); break; - + case MD_CONTEXT_MIPS: case MD_CONTEXT_MIPS64: cpu_stackwalker = new StackwalkerMIPS(system_info, @@ -253,7 +258,7 @@ Stackwalker* Stackwalker::StackwalkerForCPU( frame_symbolizer); break; } - + case MD_CONTEXT_ARM64: cpu_stackwalker = new StackwalkerARM64(system_info, context->GetContextARM64(), @@ -271,7 +276,33 @@ Stackwalker* Stackwalker::StackwalkerForCPU( return cpu_stackwalker; } -bool Stackwalker::InstructionAddressSeemsValid(uint64_t address) { +// CONSIDER: check stack alignment? +bool Stackwalker::TerminateWalk(uint64_t caller_ip, + uint64_t caller_sp, + uint64_t callee_sp, + bool first_unwind) const { + // Treat an instruction address less than 4k as end-of-stack. + // (using InstructionAddressSeemsValid() here is very tempting, + // but we need to handle JITted code) + if (caller_ip < (1 << 12)) { + return true; + } + + // NOTE: The stack address range is implicitly checked + // when the stack memory is accessed. + + // The stack pointer should monotonically increase. For first unwind + // we allow caller_sp == callee_sp to account for architectures where + // the return address is stored in a register (so it's possible to have + // leaf functions which don't move the stack pointer) + if (first_unwind ? (caller_sp < callee_sp) : (caller_sp <= callee_sp)) { + return true; + } + + return false; +} + +bool Stackwalker::InstructionAddressSeemsValid(uint64_t address) const { StackFrame frame; frame.instruction = address; StackFrameSymbolizer::SymbolizerResult symbolizer_result = diff --git a/src/processor/stackwalker_amd64.cc b/src/processor/stackwalker_amd64.cc index d1333248..d5ac6c65 100644 --- a/src/processor/stackwalker_amd64.cc +++ b/src/processor/stackwalker_amd64.cc @@ -147,23 +147,6 @@ StackFrameAMD64* StackwalkerAMD64::GetCallerByCFIFrameInfo( return frame.release(); } -bool StackwalkerAMD64::IsEndOfStack(uint64_t caller_rip, uint64_t caller_rsp, - uint64_t callee_rsp) { - // Treat an instruction address of 0 as end-of-stack. - if (caller_rip == 0) { - return true; - } - - // If the new stack pointer is at a lower address than the old, then - // that's clearly incorrect. Treat this as end-of-stack to enforce - // progress and avoid infinite loops. - if (caller_rsp < callee_rsp) { - return true; - } - - return false; -} - // Returns true if `ptr` is not in x86-64 canonical form. // https://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details static bool is_non_canonical(uint64_t ptr) { @@ -173,7 +156,6 @@ static bool is_non_canonical(uint64_t ptr) { StackFrameAMD64* StackwalkerAMD64::GetCallerByFramePointerRecovery( const vector& frames) { StackFrameAMD64* last_frame = static_cast(frames.back()); - uint64_t last_rsp = last_frame->context.rsp; uint64_t last_rbp = last_frame->context.rbp; // Assume the presence of a frame pointer. This is not mandated by the @@ -208,10 +190,8 @@ StackFrameAMD64* StackwalkerAMD64::GetCallerByFramePointerRecovery( return NULL; } - // Simple sanity check that the stack is growing downwards as expected. - if (IsEndOfStack(caller_rip, caller_rsp, last_rsp) || - caller_rbp < last_rbp) { - // Reached end-of-stack or stack is not growing downwards. + // Check that rbp is within the right frame + if (caller_rsp <= last_rbp || caller_rbp < caller_rsp) { return NULL; } @@ -327,9 +307,9 @@ StackFrame* StackwalkerAMD64::GetCallerFrame(const CallStack* stack, new_frame->context.rbp = static_cast(new_frame->context.rbp); } - if (IsEndOfStack(new_frame->context.rip, new_frame->context.rsp, - last_frame->context.rsp)) { - // Reached end-of-stack. + // Should we terminate the stack walk? (end-of-stack or broken invariant) + if (TerminateWalk(new_frame->context.rip, new_frame->context.rsp, + last_frame->context.rsp, frames.size() == 1)) { return NULL; } diff --git a/src/processor/stackwalker_amd64.h b/src/processor/stackwalker_amd64.h index 67c45510..8f3dbd52 100644 --- a/src/processor/stackwalker_amd64.h +++ b/src/processor/stackwalker_amd64.h @@ -78,14 +78,6 @@ class StackwalkerAMD64 : public Stackwalker { StackFrameAMD64* GetCallerByCFIFrameInfo(const vector &frames, CFIFrameInfo* cfi_frame_info); - // Checks whether end-of-stack is reached. An instruction address of 0 is an - // end-of-stack marker. If the stack pointer of the caller is at a lower - // address than the stack pointer of the callee, then that's clearly incorrect - // and it is treated as end-of-stack to enforce progress and avoid infinite - // loops. - bool IsEndOfStack(uint64_t caller_rip, uint64_t caller_rsp, - uint64_t callee_rsp); - // Assumes a traditional frame layout where the frame pointer has not been // omitted. The expectation is that caller's %rbp is pushed to the stack // after the return address of the callee, and that the callee's %rsp can diff --git a/src/processor/stackwalker_arm.cc b/src/processor/stackwalker_arm.cc index e4fc5869..dabb4fd2 100644 --- a/src/processor/stackwalker_arm.cc +++ b/src/processor/stackwalker_arm.cc @@ -267,17 +267,13 @@ StackFrame* StackwalkerARM::GetCallerFrame(const CallStack* stack, if (!frame.get()) return NULL; - - // An instruction address of zero marks the end of the stack. - if (frame->context.iregs[MD_CONTEXT_ARM_REG_PC] == 0) - return NULL; - - // If the new stack pointer is at a lower address than the old, then - // that's clearly incorrect. Treat this as end-of-stack to enforce - // progress and avoid infinite loops. - if (frame->context.iregs[MD_CONTEXT_ARM_REG_SP] - < last_frame->context.iregs[MD_CONTEXT_ARM_REG_SP]) + // Should we terminate the stack walk? (end-of-stack or broken invariant) + if (TerminateWalk(frame->context.iregs[MD_CONTEXT_ARM_REG_PC], + frame->context.iregs[MD_CONTEXT_ARM_REG_SP], + last_frame->context.iregs[MD_CONTEXT_ARM_REG_SP], + frames.size() == 1)) { return NULL; + } // The new frame's context's PC is the return address, which is one // instruction past the instruction that caused us to arrive at the diff --git a/src/processor/stackwalker_arm64.cc b/src/processor/stackwalker_arm64.cc index 31119a97..f9660112 100644 --- a/src/processor/stackwalker_arm64.cc +++ b/src/processor/stackwalker_arm64.cc @@ -252,16 +252,13 @@ StackFrame* StackwalkerARM64::GetCallerFrame(const CallStack* stack, if (!frame.get()) return NULL; - // An instruction address of zero marks the end of the stack. - if (frame->context.iregs[MD_CONTEXT_ARM64_REG_PC] == 0) - return NULL; - - // If the new stack pointer is at a lower address than the old, then - // that's clearly incorrect. Treat this as end-of-stack to enforce - // progress and avoid infinite loops. - if (frame->context.iregs[MD_CONTEXT_ARM64_REG_SP] - < last_frame->context.iregs[MD_CONTEXT_ARM64_REG_SP]) + // Should we terminate the stack walk? (end-of-stack or broken invariant) + if (TerminateWalk(frame->context.iregs[MD_CONTEXT_ARM64_REG_PC], + frame->context.iregs[MD_CONTEXT_ARM64_REG_SP], + last_frame->context.iregs[MD_CONTEXT_ARM64_REG_SP], + frames.size() == 1)) { return NULL; + } // The new frame's context's PC is the return address, which is one // instruction past the instruction that caused us to arrive at the callee. diff --git a/src/processor/stackwalker_mips.cc b/src/processor/stackwalker_mips.cc index 9a81b46e..c33ecdbe 100644 --- a/src/processor/stackwalker_mips.cc +++ b/src/processor/stackwalker_mips.cc @@ -272,16 +272,11 @@ StackFrame* StackwalkerMIPS::GetCallerFrame(const CallStack* stack, return NULL; } - // Treat an instruction address of 0 as end-of-stack. - if (new_frame->context.epc == 0) { - return NULL; - } - - // If the new stack pointer is at a lower address than the old, then - // that's clearly incorrect. Treat this as end-of-stack to enforce - // progress and avoid infinite loops. - if (new_frame->context.iregs[MD_CONTEXT_MIPS_REG_SP] < - last_frame->context.iregs[MD_CONTEXT_MIPS_REG_SP]) { + // Should we terminate the stack walk? (end-of-stack or broken invariant) + if (TerminateWalk(new_frame->context.epc, + new_frame->context.iregs[MD_CONTEXT_MIPS_REG_SP], + last_frame->context.iregs[MD_CONTEXT_MIPS_REG_SP], + frames.size() == 1)) { return NULL; } diff --git a/src/processor/stackwalker_ppc.cc b/src/processor/stackwalker_ppc.cc index 7e208844..c9ab144f 100644 --- a/src/processor/stackwalker_ppc.cc +++ b/src/processor/stackwalker_ppc.cc @@ -130,6 +130,14 @@ StackFrame* StackwalkerPPC::GetCallerFrame(const CallStack* stack, StackFramePPC::CONTEXT_VALID_GPR1; frame->trust = StackFrame::FRAME_TRUST_FP; + // Should we terminate the stack walk? (end-of-stack or broken invariant) + if (TerminateWalk(instruction, + stack_pointer, + last_frame->context.gpr[1], + stack->frames()->size() == 1)) { + return NULL; + } + // frame->context.srr0 is the return address, which is one instruction // past the branch that caused us to arrive at the callee. Set // frame_ppc->instruction to four less than that. Since all ppc diff --git a/src/processor/stackwalker_ppc64.cc b/src/processor/stackwalker_ppc64.cc index 51c71fe5..4fd9e740 100644 --- a/src/processor/stackwalker_ppc64.cc +++ b/src/processor/stackwalker_ppc64.cc @@ -121,6 +121,14 @@ StackFrame* StackwalkerPPC64::GetCallerFrame(const CallStack* stack, StackFramePPC64::CONTEXT_VALID_GPR1; frame->trust = StackFrame::FRAME_TRUST_FP; + // Should we terminate the stack walk? (end-of-stack or broken invariant) + if (TerminateWalk(instruction, + stack_pointer, + last_frame->context.gpr[1], + stack->frames()->size() == 1)) { + return NULL; + } + // frame->context.srr0 is the return address, which is one instruction // past the branch that caused us to arrive at the callee. Set // frame_ppc64->instruction to eight less than that. Since all ppc64 diff --git a/src/processor/stackwalker_sparc.cc b/src/processor/stackwalker_sparc.cc index ff2ea75a..4de838af 100644 --- a/src/processor/stackwalker_sparc.cc +++ b/src/processor/stackwalker_sparc.cc @@ -111,6 +111,14 @@ StackFrame* StackwalkerSPARC::GetCallerFrame(const CallStack* stack, return NULL; } + // Should we terminate the stack walk? (end-of-stack or broken invariant) + if (TerminateWalk(instruction, + stack_pointer, + last_frame->context.g_r[14], + stack->frames()->size() == 1)) { + return NULL; + } + StackFrameSPARC* frame = new StackFrameSPARC(); frame->context = last_frame->context; diff --git a/src/processor/stackwalker_x86.cc b/src/processor/stackwalker_x86.cc index 46e26cf9..ed2b383d 100644 --- a/src/processor/stackwalker_x86.cc +++ b/src/processor/stackwalker_x86.cc @@ -659,15 +659,13 @@ StackFrame* StackwalkerX86::GetCallerFrame(const CallStack* stack, if (!new_frame.get()) return NULL; - // Treat an instruction address of 0 as end-of-stack. - if (new_frame->context.eip == 0) - return NULL; - - // If the new stack pointer is at a lower address than the old, then - // that's clearly incorrect. Treat this as end-of-stack to enforce - // progress and avoid infinite loops. - if (new_frame->context.esp <= last_frame->context.esp) + // Should we terminate the stack walk? (end-of-stack or broken invariant) + if (TerminateWalk(new_frame->context.eip, + new_frame->context.esp, + last_frame->context.esp, + frames.size() == 1)) { return NULL; + } // new_frame->context.eip is the return address, which is the instruction // after the CALL that caused us to arrive at the callee. Set