Skip to content
This repository has been archived by the owner on Feb 5, 2019. It is now read-only.

Commit

Permalink
Add stack overflow check for ARM Thumb instruction set.
Browse files Browse the repository at this point in the history
Besides the mechanincal changes between the ARM and Thumb functions,
because of the different instruction sets, there is difference in how the
stack limit is located. The ARM version uses hardware which isn't available
on the lower-end Thumb processors (namely system co-processor and MMU)
therefore the stack limit is placed at a predefined location in
memory - STACK_LIMIT. It is the responsibility of the wrapping runtime
to manage this location with the correct value. It can vary from a simple
constant defined by the linker to actively managed variable by a RTOS
implementation.
  • Loading branch information
neykov authored and alexcrichton committed Feb 26, 2014
1 parent ed0d140 commit fd8a627
Show file tree
Hide file tree
Showing 4 changed files with 365 additions and 1 deletion.
178 changes: 177 additions & 1 deletion lib/Target/ARM/ARMFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "ARMFrameLowering.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMTargetMachine.h"
Expand Down Expand Up @@ -1639,10 +1640,20 @@ static uint32_t AlignToARMConstant(uint32_t Value) {
// stack limit.
static const uint64_t kSplitStackAvailable = 256;

void
ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
const ARMSubtarget *ST = &MF.getTarget().getSubtarget<ARMSubtarget>();
if(ST->isThumb()) {
adjustForSegmentedStacksThumb(MF);
} else {
adjustForSegmentedStacksARM(MF);
}
}

// Adjust function prologue to enable split stack.
// Only support android and linux.
void
ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
ARMFrameLowering::adjustForSegmentedStacksARM(MachineFunction &MF) const {
const ARMSubtarget *ST = &MF.getTarget().getSubtarget<ARMSubtarget>();

// Doesn't support vararg function.
Expand Down Expand Up @@ -1855,3 +1866,168 @@ ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
MF.verify();
#endif
}

void
ARMFrameLowering::adjustForSegmentedStacksThumb(MachineFunction &MF) const {
// const ARMSubtarget *ST = &MF.getTarget().getSubtarget<ARMSubtarget>();

// Doesn't support vararg function.
if (MF.getFunction()->isVarArg())
report_fatal_error("Segmented stacks do not support vararg functions.");

MachineBasicBlock &prologueMBB = MF.front();
MachineFrameInfo* MFI = MF.getFrameInfo();
const ARMBaseInstrInfo &TII = *TM.getInstrInfo();
ARMFunctionInfo* ARMFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL;

// Use R4 and R5 as scratch register.
// We should save R4 and R5 before use it and restore before
// leave the function.
unsigned ScratchReg0 = ARM::R4;
unsigned ScratchReg1 = ARM::R5;
uint64_t AlignedStackSize;

MachineBasicBlock* prevStackMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock* postStackMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock* allocMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock* getMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock* mcrMBB = MF.CreateMachineBasicBlock();

for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
e = prologueMBB.livein_end(); i != e; ++i) {
allocMBB->addLiveIn(*i);
getMBB->addLiveIn(*i);
mcrMBB->addLiveIn(*i);
prevStackMBB->addLiveIn(*i);
postStackMBB->addLiveIn(*i);
}

MF.push_front(postStackMBB);
MF.push_front(allocMBB);
MF.push_front(getMBB);
MF.push_front(mcrMBB);
MF.push_front(prevStackMBB);

// The required stack size that is aligend to ARM constant critarion.
uint64_t StackSize = MFI->getStackSize();

AlignedStackSize = AlignToARMConstant(StackSize);

// When the frame size is less than 256 we just compare the stack
// boundary directly to the value of the stack pointer, per gcc.
bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;

// We will use two of callee save registers as scratch register so we
// need to save those registers into stack frame before use it.
// We will use SR0 to hold stack limit and SR1 to stack size requested.
// and arguments for __morestack().
// SR0: Scratch Register #0
// SR1: Scratch Register #1
// push {SR0, SR1}
AddDefaultPred(BuildMI(prevStackMBB, DL, TII.get(ARM::tPUSH)))
.addReg(ScratchReg0)
.addReg(ScratchReg1);

// mov SR1, sp
AddDefaultPred(BuildMI(mcrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
.addReg(ARM::SP));

if (!CompareStackPointer) {
// sub SR1, #StackSize
AddDefaultPred(AddDefaultCC(BuildMI(mcrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1))
.addReg(ScratchReg1).addImm(AlignedStackSize));
}

unsigned PCLabelId = ARMFI->createPICLabelUId();
ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::
Create(MF.getFunction()->getContext(), "STACK_LIMIT", PCLabelId, 0);
MachineConstantPool *MCP = MF.getConstantPool();
unsigned CPI = MCP->getConstantPoolIndex(NewCPV, MF.getAlignment());

//ldr SR0, [pc, offset(STACK_LIMIT)]
AddDefaultPred(BuildMI(getMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
.addConstantPoolIndex(CPI));

//ldr SR0, [SR0]
AddDefaultPred(BuildMI(getMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
.addReg(ScratchReg0)
.addImm(0));

// Compare stack limit with stack size requested.
// cmp SR0, SR1
AddDefaultPred(BuildMI(getMBB, DL, TII.get(ARM::tCMPr))
.addReg(ScratchReg0)
.addReg(ScratchReg1));

// This jump is taken if StackLimit < SP - stack required.
BuildMI(getMBB, DL, TII.get(ARM::tBcc))
.addMBB(postStackMBB)
.addImm(ARMCC::LO)
.addReg(ARM::CPSR);


// Calling __morestack(StackSize, Size of stack arguments).
// __morestack knows that the stack size requested is in SR0(r4)
// and amount size of stack arguments is in SR1(r5).

// Pass first argument for the __morestack by Scratch Register #0.
// The amount size of stack required
AddDefaultPred(AddDefaultCC(BuildMI(allocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0))
.addImm(AlignedStackSize));
// Pass second argument for the __morestack by Scratch Register #1.
// The amount size of stack consumed to save function arguments.
AddDefaultPred(AddDefaultCC(BuildMI(allocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1))
.addImm(AlignToARMConstant(ARMFI->getArgumentStackSize())));

// push {lr} - Save return address of this function.
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::tPUSH)))
.addReg(ARM::LR);

// Call __morestack().
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::tBL)))
.addExternalSymbol("__morestack");

// Restore return address of this original function.
// pop {SR0}
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::tPOP)))
.addReg(ScratchReg0);

// mov lr, SR0
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
.addReg(ScratchReg0));

// Restore SR0 and SR1 in case of __morestack() was called.
// __morestack() will skip postStackMBB block so we need to restore
// scratch registers from here.
// pop {SR0, SR1}
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::tPOP)))
.addReg(ScratchReg0)
.addReg(ScratchReg1);

// Return from this function.
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::tMOVr), ARM::PC)
.addReg(ARM::LR));

// Restore SR0 and SR1 in case of __morestack() was not called.
// pop {SR0, SR1}
AddDefaultPred(BuildMI(postStackMBB, DL, TII.get(ARM::tPOP)))
.addReg(ScratchReg0)
.addReg(ScratchReg1);

// Organizing MBB lists
postStackMBB->addSuccessor(&prologueMBB);

allocMBB->addSuccessor(postStackMBB);

getMBB->addSuccessor(postStackMBB);
getMBB->addSuccessor(allocMBB);

mcrMBB->addSuccessor(getMBB);

prevStackMBB->addSuccessor(mcrMBB);

#ifdef XDEBUG
MF.verify();
#endif
}
2 changes: 2 additions & 0 deletions lib/Target/ARM/ARMFrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ class ARMFrameLowering : public TargetFrameLowering {
RegScavenger *RS) const;

void adjustForSegmentedStacks(MachineFunction &MF) const;
void adjustForSegmentedStacksThumb(MachineFunction &MF) const;
void adjustForSegmentedStacksARM(MachineFunction &MF) const;

private:
void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
Expand Down
41 changes: 41 additions & 0 deletions test/CodeGen/Thumb/segmented-stacks-dynamic.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
; RUN: llc < %s -mcpu=generic -mtriple=thumb-linux-android -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-Linux-Android
; RUN: llc < %s -mcpu=generic -mtriple=thumb-linux-android -segmented-stacks -filetype=obj

; Just to prevent the alloca from being optimized away
declare void @dummy_use(i32*, i32)

define i32 @test_basic(i32 %l) {
%mem = alloca i32, i32 %l
call void @dummy_use (i32* %mem, i32 %l)
%terminate = icmp eq i32 %l, 0
br i1 %terminate, label %true, label %false

true:
ret i32 0

false:
%newlen = sub i32 %l, 1
%retvalue = call i32 @test_basic(i32 %newlen)
ret i32 %retvalue

; Thumb-Linux-Android: test_basic:

; Thumb-Linux-Android: push {r4, r5}
; Thumb-Linux-Android-NEXT: mov r5, sp
; Thumb-Linux-Android-NEXT: ldr r4, .LCPI0_0
; Thumb-Linux-Android-NEXT: ldr r4, [r4]
; Thumb-Linux-Android-NEXT: cmp r4, r5
; Thumb-Linux-Android-NEXT: blo .LBB0_2

; Thumb-Linux-Android: mov r4, #16
; Thumb-Linux-Android-NEXT: mov r5, #0
; Thumb-Linux-Android-NEXT: push {lr}
; Thumb-Linux-Android-NEXT: bl __morestack
; Thumb-Linux-Android-NEXT: pop {r4}
; Thumb-Linux-Android-NEXT: mov lr, r4
; Thumb-Linux-Android-NEXT: pop {r4, r5}
; Thumb-Linux-Android-NEXT: mov pc, lr

; Thumb-Linux-Android: pop {r4, r5}

}
145 changes: 145 additions & 0 deletions test/CodeGen/Thumb/segmented-stacks.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
; RUN: llc < %s -mcpu=generic -mtriple=thumb-linux-android -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-Linux-Android

; We used to crash with filetype=obj
; RUN: llc < %s -mcpu=generic -mtriple=thumb-linux-android -segmented-stacks -filetype=obj


; Just to prevent the alloca from being optimized away
declare void @dummy_use(i32*, i32)

define void @test_basic() {
%mem = alloca i32, i32 10
call void @dummy_use (i32* %mem, i32 10)
ret void

; Thumb-Linux-Android: test_basic:

; Thumb-Linux-Android: push {r4, r5}
; Thumb-Linux-Android-NEXT: mov r5, sp
; Thumb-Linux-Android-NEXT: ldr r4, .LCPI0_0
; Thumb-Linux-Android-NEXT: ldr r4, [r4]
; Thumb-Linux-Android-NEXT: cmp r4, r5
; Thumb-Linux-Android-NEXT: blo .LBB0_2

; Thumb-Linux-Android: mov r4, #48
; Thumb-Linux-Android-NEXT: mov r5, #0
; Thumb-Linux-Android-NEXT: push {lr}
; Thumb-Linux-Android-NEXT: bl __morestack
; Thumb-Linux-Android-NEXT: pop {r4}
; Thumb-Linux-Android-NEXT: mov lr, r4
; Thumb-Linux-Android-NEXT: pop {r4, r5}
; Thumb-Linux-Android-NEXT: mov pc, lr

; Thumb-Linux-Android: pop {r4, r5}

}

define i32 @test_nested(i32 * nest %closure, i32 %other) {
%addend = load i32 * %closure
%result = add i32 %other, %addend
ret i32 %result

; Thumb-Linux-Android: test_nested:

; Thumb-Linux-Android: push {r4, r5}
; Thumb-Linux-Android-NEXT: mov r5, sp
; Thumb-Linux-Android-NEXT: ldr r4, .LCPI1_0
; Thumb-Linux-Android-NEXT: ldr r4, [r4]
; Thumb-Linux-Android-NEXT: cmp r4, r5
; Thumb-Linux-Android-NEXT: blo .LBB1_2

; Thumb-Linux-Android: mov r4, #0
; Thumb-Linux-Android-NEXT: mov r5, #0
; Thumb-Linux-Android-NEXT: push {lr}
; Thumb-Linux-Android-NEXT: bl __morestack
; Thumb-Linux-Android-NEXT: pop {r4}
; Thumb-Linux-Android-NEXT: mov lr, r4
; Thumb-Linux-Android-NEXT: pop {r4, r5}
; Thumb-Linux-Android-NEXT: mov pc, lr

; Thumb-Linux-Android: pop {r4, r5}

}

define void @test_large() {
%mem = alloca i32, i32 10000
call void @dummy_use (i32* %mem, i32 0)
ret void

; Thumb-Linux-Android: test_large:

; Thumb-Linux-Android: push {r4, r5}
; Thumb-Linux-Android-NEXT: mov r5, sp
; Thumb-Linux-Android-NEXT: sub r5, #40192
; Thumb-Linux-Android-NEXT: ldr r4, .LCPI2_2
; Thumb-Linux-Android-NEXT: ldr r4, [r4]
; Thumb-Linux-Android-NEXT: cmp r4, r5
; Thumb-Linux-Android-NEXT: blo .LBB2_2

; Thumb-Linux-Android: mov r4, #40192
; Thumb-Linux-Android-NEXT: mov r5, #0
; Thumb-Linux-Android-NEXT: push {lr}
; Thumb-Linux-Android-NEXT: bl __morestack
; Thumb-Linux-Android-NEXT: pop {r4}
; Thumb-Linux-Android-NEXT: mov lr, r4
; Thumb-Linux-Android-NEXT: pop {r4, r5}
; Thumb-Linux-Android-NEXT: mov pc, lr

; Thumb-Linux-Android: pop {r4, r5}

}

define fastcc void @test_fastcc() {
%mem = alloca i32, i32 10
call void @dummy_use (i32* %mem, i32 10)
ret void

; Thumb-Linux-Android: test_fastcc:

; Thumb-Linux-Android: push {r4, r5}
; Thumb-Linux-Android-NEXT: mov r5, sp
; Thumb-Linux-Android-NEXT: ldr r4, .LCPI3_0
; Thumb-Linux-Android-NEXT: ldr r4, [r4]
; Thumb-Linux-Android-NEXT: cmp r4, r5
; Thumb-Linux-Android-NEXT: blo .LBB3_2

; Thumb-Linux-Android: mov r4, #48
; Thumb-Linux-Android-NEXT: mov r5, #0
; Thumb-Linux-Android-NEXT: push {lr}
; Thumb-Linux-Android-NEXT: bl __morestack
; Thumb-Linux-Android-NEXT: pop {r4}
; Thumb-Linux-Android-NEXT: mov lr, r4
; Thumb-Linux-Android-NEXT: pop {r4, r5}
; Thumb-Linux-Android-NEXT: mov pc, lr

; Thumb-Linux-Android: pop {r4, r5}

}

define fastcc void @test_fastcc_large() {
%mem = alloca i32, i32 10000
call void @dummy_use (i32* %mem, i32 0)
ret void

; Thumb-Linux-Android: test_fastcc_large:

; Thumb-Linux-Android: push {r4, r5}
; Thumb-Linux-Android-NEXT: mov r5, sp
; Thumb-Linux-Android-NEXT: sub r5, #40192
; Thumb-Linux-Android-NEXT: ldr r4, .LCPI4_2
; Thumb-Linux-Android-NEXT: ldr r4, [r4]
; Thumb-Linux-Android-NEXT: cmp r4, r5
; Thumb-Linux-Android-NEXT: blo .LBB4_2

; Thumb-Linux-Android: mov r4, #40192
; Thumb-Linux-Android-NEXT: mov r5, #0
; Thumb-Linux-Android-NEXT: push {lr}
; Thumb-Linux-Android-NEXT: bl __morestack
; Thumb-Linux-Android-NEXT: pop {r4}
; Thumb-Linux-Android-NEXT: mov lr, r4
; Thumb-Linux-Android-NEXT: pop {r4, r5}
; Thumb-Linux-Android-NEXT: mov pc, lr

; Thumb-Linux-Android: pop {r4, r5}

}

0 comments on commit fd8a627

Please sign in to comment.