Skip to content

Commit

Permalink
[compiler-rt] Allow 3 simultaneous interceptors on Linux
Browse files Browse the repository at this point in the history
Rework Linux (and *BSD) interceptors to allow for up to 3 (2 for *BSD)
simultaneous interceptors. See code comments for details.

The main motivation is to support new sampling sanitizers (in the spirit
of GWP-ASan), that have to intercept few functions. Unfortunately, the
reality is that there are user interceptors that exist in the wild.

To support foreign user interceptors, foreign dynamic analysis
interceptors, and compiler-rt interceptors all at the same time,
including any combination of them, this change enables up to 3
interceptors on Linux (2 on *BSD).

Reviewed By: dvyukov, MaskRay, vitalybuka

Differential Revision: https://reviews.llvm.org/D151085
  • Loading branch information
melver committed Jun 7, 2023
1 parent 2b7ded2 commit 74b0ac5
Show file tree
Hide file tree
Showing 7 changed files with 330 additions and 40 deletions.
101 changes: 81 additions & 20 deletions compiler-rt/lib/interception/interception.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#ifndef INTERCEPTION_H
#define INTERCEPTION_H

#include "sanitizer_common/sanitizer_asm.h"
#include "sanitizer_common/sanitizer_internal_defs.h"

#if !SANITIZER_LINUX && !SANITIZER_FREEBSD && !SANITIZER_APPLE && \
Expand Down Expand Up @@ -67,24 +68,50 @@ typedef __sanitizer::OFF64_T OFF64_T;
// for more details). To intercept such functions you need to use the
// INTERCEPTOR_WITH_SUFFIX(...) macro.

// How it works:
// To replace system functions on Linux we just need to declare functions
// with same names in our library and then obtain the real function pointers
// How it works on Linux
// ---------------------
//
// To replace system functions on Linux we just need to declare functions with
// the same names in our library and then obtain the real function pointers
// using dlsym().
// There is one complication. A user may also intercept some of the functions
// we intercept. To resolve this we declare our interceptors with __interceptor_
// prefix, and then make actual interceptors weak aliases to __interceptor_
// functions.
//
// This is not so on Mac OS, where the two-level namespace makes
// our replacement functions invisible to other libraries. This may be overcomed
// using the DYLD_FORCE_FLAT_NAMESPACE, but some errors loading the shared
// libraries in Chromium were noticed when doing so.
// There is one complication: a user may also intercept some of the functions we
// intercept. To allow for up to 3 interceptors (including ours) of a given
// function "func", the interceptor implementation is in ___interceptor_func,
// which is aliased by a weak function __interceptor_func, which in turn is
// aliased (via a trampoline) by weak wrapper function "func".
//
// Most user interceptors should define a foreign interceptor as follows:
//
// - provide a non-weak function "func" that performs interception;
// - if __interceptor_func exists, call it to perform the real functionality;
// - if it does not exist, figure out the real function and call it instead.
//
// In rare cases, a foreign interceptor (of another dynamic analysis runtime)
// may be defined as follows:
//
// - provide a non-weak function __interceptor_func that performs interception;
// - if ___interceptor_func exists, call it to perform the real functionality;
// - if it does not exist, figure out the real function and call it instead;
// - provide a weak function "func" that is an alias to __interceptor_func.
//
// With this protocol, sanitizer interceptors, foreign user interceptors, and
// foreign interceptors of other dynamic analysis runtimes, or any combination
// thereof, may co-exist simultaneously.
//
// How it works on Mac OS
// ----------------------
//
// This is not so on Mac OS, where the two-level namespace makes our replacement
// functions invisible to other libraries. This may be overcomed using the
// DYLD_FORCE_FLAT_NAMESPACE, but some errors loading the shared libraries in
// Chromium were noticed when doing so.
//
// Instead we create a dylib containing a __DATA,__interpose section that
// associates library functions with their wrappers. When this dylib is
// preloaded before an executable using DYLD_INSERT_LIBRARIES, it routes all
// the calls to interposed functions done through stubs to the wrapper
// functions.
// preloaded before an executable using DYLD_INSERT_LIBRARIES, it routes all the
// calls to interposed functions done through stubs to the wrapper functions.
//
// As it's decided at compile time which functions are to be intercepted on Mac,
// INTERCEPT_FUNCTION() is effectively a no-op on this system.

Expand Down Expand Up @@ -131,20 +158,54 @@ const interpose_substitution substitution_##func_name[] \
# define DECLARE_WRAPPER_WINAPI(ret_type, func, ...) \
extern "C" __declspec(dllimport) ret_type __stdcall func(__VA_ARGS__);
#elif !SANITIZER_FUCHSIA // LINUX, FREEBSD, NETBSD, SOLARIS
# define WRAP(x) __interceptor_ ## x
# define TRAMPOLINE(x) WRAP(x)
# define WRAP(x) ___interceptor_ ## x
# define TRAMPOLINE(x) __interceptor_trampoline_ ## x
# define INTERCEPTOR_ATTRIBUTE __attribute__((visibility("default")))
# if SANITIZER_FREEBSD || SANITIZER_NETBSD
// FreeBSD's dynamic linker (incompliantly) gives non-weak symbols higher
// priority than weak ones so weak aliases won't work for indirect calls
// in position-independent (-fPIC / -fPIE) mode.
# define OVERRIDE_ATTRIBUTE
# define __ASM_WEAK_WRAPPER(func)
# else // SANITIZER_FREEBSD || SANITIZER_NETBSD
# define OVERRIDE_ATTRIBUTE __attribute__((weak))
# define __ASM_WEAK_WRAPPER(func) ".weak " #func "\n"
# endif // SANITIZER_FREEBSD || SANITIZER_NETBSD
//
// Note: Weak aliases of weak aliases do not work, therefore we need to set up a
// trampoline function. The function "func" is a weak alias to the trampoline
// (so that we may check if "func" was overridden), which calls the weak
// function __interceptor_func, which in turn aliases the actual interceptor
// implementation ___interceptor_func:
//
// [wrapper "func": weak] --(alias)--> [TRAMPOLINE(func)]
// |
// +--------(tail call)-------+
// |
// v
// [__interceptor_func: weak] --(alias)--> [WRAP(func)]
//
// We use inline assembly to define most of this, because not all compilers
// support functions with the "naked" attribute with every architecture.
//
# define DECLARE_WRAPPER(ret_type, func, ...) \
extern "C" ret_type func(__VA_ARGS__) INTERCEPTOR_ATTRIBUTE \
OVERRIDE_ATTRIBUTE ALIAS(WRAP(func));
extern "C" ret_type func(__VA_ARGS__); \
extern "C" ret_type TRAMPOLINE(func)(__VA_ARGS__); \
extern "C" ret_type __interceptor_##func(__VA_ARGS__) \
INTERCEPTOR_ATTRIBUTE __attribute__((weak)) ALIAS(WRAP(func)); \
asm( \
".text\n" \
__ASM_WEAK_WRAPPER(func) \
".set " #func ", " SANITIZER_STRINGIFY(TRAMPOLINE(func)) "\n" \
".globl " SANITIZER_STRINGIFY(TRAMPOLINE(func)) "\n" \
".type " SANITIZER_STRINGIFY(TRAMPOLINE(func)) ", @function\n" \
SANITIZER_STRINGIFY(TRAMPOLINE(func)) ":\n" \
SANITIZER_STRINGIFY(CFI_STARTPROC) "\n" \
ASM_INL_PPC64_GLOBALENTRY(SANITIZER_STRINGIFY(TRAMPOLINE(func))) \
SANITIZER_STRINGIFY(ASM_TAIL_CALL) " __interceptor_" \
SANITIZER_STRINGIFY(ASM_PREEMPTIBLE_SYM(func)) "\n" \
SANITIZER_STRINGIFY(CFI_ENDPROC) "\n" \
".size " SANITIZER_STRINGIFY(TRAMPOLINE(func)) ", " \
".-" SANITIZER_STRINGIFY(TRAMPOLINE(func)) "\n" \
);
#endif

#if SANITIZER_FUCHSIA
Expand Down
5 changes: 5 additions & 0 deletions compiler-rt/lib/interception/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ filter_available_targets(INTERCEPTION_UNITTEST_SUPPORTED_ARCH x86_64 i386 mips64

set(INTERCEPTION_UNITTESTS
interception_linux_test.cpp
interception_linux_foreign_test.cpp
interception_test_main.cpp
interception_win_test.cpp
)
Expand All @@ -19,6 +20,10 @@ set(INTERCEPTION_TEST_CFLAGS_COMMON
-I${COMPILER_RT_SOURCE_DIR}/lib/interception
-DSANITIZER_COMMON_NO_REDEFINE_BUILTINS
-fno-rtti
-fno-builtin-isdigit
-fno-builtin-isalpha
-fno-builtin-isalnum
-fno-builtin-islower
-O2
-Werror=sign-compare)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
//===-- interception_linux_foreign_test.cpp -------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file is a part of ThreadSanitizer/AddressSanitizer runtime.
//
// Tests that foreign interceptors work.
//
//===----------------------------------------------------------------------===//

// Do not declare functions in ctype.h.
#define __NO_CTYPE

#include "gtest/gtest.h"
#include "sanitizer_common/sanitizer_internal_defs.h"

#if SANITIZER_LINUX

extern "C" int isalnum(int d);
extern "C" int __interceptor_isalpha(int d);
extern "C" int ___interceptor_isalnum(int d); // the sanitizer interceptor
extern "C" int ___interceptor_islower(int d); // the sanitizer interceptor

namespace __interception {
extern int isalpha_called;
extern int isalnum_called;
extern int islower_called;
} // namespace __interception
using namespace __interception;

// Direct foreign interceptor. This is the "normal" protocol that other
// interceptors should follow.
extern "C" int isalpha(int d) {
// Use non-commutative arithmetic to verify order of calls.
isalpha_called = isalpha_called * 10 + 1;
return __interceptor_isalpha(d);
}

// Indirect foreign interceptor. This pattern should only be used to co-exist
// with direct foreign interceptors and sanitizer interceptors.
extern "C" int __interceptor_isalnum(int d) {
isalnum_called = isalnum_called * 10 + 1;
return ___interceptor_isalnum(d);
}

extern "C" int __interceptor_islower(int d) {
islower_called = islower_called * 10 + 2;
return ___interceptor_islower(d);
}

extern "C" int islower(int d) {
islower_called = islower_called * 10 + 1;
return __interceptor_islower(d);
}

namespace __interception {

TEST(ForeignInterception, ForeignOverrideDirect) {
isalpha_called = 0;
EXPECT_NE(0, isalpha('a'));
EXPECT_EQ(13, isalpha_called);
isalpha_called = 0;
EXPECT_EQ(0, isalpha('_'));
EXPECT_EQ(13, isalpha_called);
}

TEST(ForeignInterception, ForeignOverrideIndirect) {
isalnum_called = 0;
EXPECT_NE(0, isalnum('a'));
EXPECT_EQ(13, isalnum_called);
isalnum_called = 0;
EXPECT_EQ(0, isalnum('_'));
EXPECT_EQ(13, isalnum_called);
}

TEST(ForeignInterception, ForeignOverrideThree) {
islower_called = 0;
EXPECT_NE(0, islower('a'));
EXPECT_EQ(123, islower_called);
islower_called = 0;
EXPECT_EQ(0, islower('_'));
EXPECT_EQ(123, islower_called);
}

} // namespace __interception

#endif // SANITIZER_LINUX
110 changes: 96 additions & 14 deletions compiler-rt/lib/interception/tests/interception_linux_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,57 +11,139 @@
//
//===----------------------------------------------------------------------===//

// Do not declare isdigit in ctype.h.
// Do not declare functions in ctype.h.
#define __NO_CTYPE

#include "interception/interception.h"

#include <stdlib.h>

#include "gtest/gtest.h"

// Too slow for debug build
#if !SANITIZER_DEBUG
#if SANITIZER_LINUX

static int InterceptorFunctionCalled;
static int isdigit_called;
namespace __interception {
int isalpha_called;
int isalnum_called;
int islower_called;
} // namespace __interception
using namespace __interception;

DECLARE_REAL(int, isdigit, int);
DECLARE_REAL(int, isalpha, int);
DECLARE_REAL(int, isalnum, int);
DECLARE_REAL(int, islower, int);

INTERCEPTOR(void *, malloc, SIZE_T s) { return calloc(1, s); }
INTERCEPTOR(void, dummy_doesnt_exist__, ) { __builtin_trap(); }

INTERCEPTOR(int, isdigit, int d) {
++InterceptorFunctionCalled;
++isdigit_called;
return d >= '0' && d <= '9';
}

INTERCEPTOR(int, isalpha, int d) {
// Use non-commutative arithmetic to verify order of calls.
isalpha_called = isalpha_called * 10 + 3;
return (d >= 'a' && d <= 'z') || (d >= 'A' && d <= 'Z');
}

INTERCEPTOR(int, isalnum, int d) {
isalnum_called = isalnum_called * 10 + 3;
return __interceptor_isalpha(d) || __interceptor_isdigit(d);
}

INTERCEPTOR(int, islower, int d) {
islower_called = islower_called * 10 + 3;
return d >= 'a' && d <= 'z';
}

namespace __interception {

TEST(Interception, InterceptFunction) {
uptr malloc_address = 0;
EXPECT_TRUE(InterceptFunction("malloc", &malloc_address, 0, 0));
EXPECT_TRUE(InterceptFunction("malloc", &malloc_address, (uptr)&malloc,
(uptr)&__interceptor_trampoline_malloc));
EXPECT_NE(0U, malloc_address);
EXPECT_FALSE(InterceptFunction("malloc", &malloc_address, 0, 1));
EXPECT_FALSE(InterceptFunction("malloc", &malloc_address, (uptr)&calloc,
(uptr)&__interceptor_trampoline_malloc));

uptr dummy_address = 0;
EXPECT_FALSE(InterceptFunction("dummy_doesnt_exist__", &dummy_address, 0, 0));
EXPECT_FALSE(InterceptFunction(
"dummy_doesnt_exist__", &dummy_address, (uptr)&dummy_doesnt_exist__,
(uptr)&__interceptor_trampoline_dummy_doesnt_exist__));
EXPECT_EQ(0U, dummy_address);
}

TEST(Interception, Basic) {
EXPECT_TRUE(INTERCEPT_FUNCTION(isdigit));

// After interception, the counter should be incremented.
InterceptorFunctionCalled = 0;
isdigit_called = 0;
EXPECT_NE(0, isdigit('1'));
EXPECT_EQ(1, InterceptorFunctionCalled);
EXPECT_EQ(1, isdigit_called);
EXPECT_EQ(0, isdigit('a'));
EXPECT_EQ(2, InterceptorFunctionCalled);
EXPECT_EQ(2, isdigit_called);

// Calling the REAL function should not affect the counter.
InterceptorFunctionCalled = 0;
isdigit_called = 0;
EXPECT_NE(0, REAL(isdigit)('1'));
EXPECT_EQ(0, REAL(isdigit)('a'));
EXPECT_EQ(0, InterceptorFunctionCalled);
EXPECT_EQ(0, isdigit_called);
}

TEST(Interception, ForeignOverrideDirect) {
// Actual interceptor is overridden.
EXPECT_FALSE(INTERCEPT_FUNCTION(isalpha));

isalpha_called = 0;
EXPECT_NE(0, isalpha('a'));
EXPECT_EQ(13, isalpha_called);
isalpha_called = 0;
EXPECT_EQ(0, isalpha('_'));
EXPECT_EQ(13, isalpha_called);

isalpha_called = 0;
EXPECT_NE(0, REAL(isalpha)('a'));
EXPECT_EQ(0, REAL(isalpha)('_'));
EXPECT_EQ(0, isalpha_called);
}

TEST(Interception, ForeignOverrideIndirect) {
// Actual interceptor is _not_ overridden.
EXPECT_TRUE(INTERCEPT_FUNCTION(isalnum));

isalnum_called = 0;
EXPECT_NE(0, isalnum('a'));
EXPECT_EQ(13, isalnum_called);
isalnum_called = 0;
EXPECT_EQ(0, isalnum('_'));
EXPECT_EQ(13, isalnum_called);

isalnum_called = 0;
EXPECT_NE(0, REAL(isalnum)('a'));
EXPECT_EQ(0, REAL(isalnum)('_'));
EXPECT_EQ(0, isalnum_called);
}

TEST(Interception, ForeignOverrideThree) {
// Actual interceptor is overridden.
EXPECT_FALSE(INTERCEPT_FUNCTION(islower));

islower_called = 0;
EXPECT_NE(0, islower('a'));
EXPECT_EQ(123, islower_called);
islower_called = 0;
EXPECT_EQ(0, islower('A'));
EXPECT_EQ(123, islower_called);

islower_called = 0;
EXPECT_NE(0, REAL(islower)('a'));
EXPECT_EQ(0, REAL(islower)('A'));
EXPECT_EQ(0, islower_called);
}

} // namespace __interception

#endif // SANITIZER_LINUX
#endif // #if !SANITIZER_DEBUG
Loading

0 comments on commit 74b0ac5

Please sign in to comment.