diff --git a/base/fastmath.jl b/base/fastmath.jl
index 36d975506e48c..b02ed7d84a1af 100644
--- a/base/fastmath.jl
+++ b/base/fastmath.jl
@@ -23,7 +23,7 @@ module FastMath
 
 export @fastmath
 
-import Core.Intrinsics: powi_llvm, sqrt_llvm_fast, neg_float_fast,
+import Core.Intrinsics: sqrt_llvm_fast, neg_float_fast,
     add_float_fast, sub_float_fast, mul_float_fast, div_float_fast, rem_float_fast,
     eq_float_fast, ne_float_fast, lt_float_fast, le_float_fast
 
@@ -243,8 +243,8 @@ end
 
 # builtins
 
-pow_fast{T<:FloatTypes}(x::T, y::Integer) = pow_fast(x, Int32(y))
-pow_fast{T<:FloatTypes}(x::T, y::Int32) = Base.powi_llvm(x, y)
+pow_fast(x::Float32, y::Integer) = ccall("llvm.powi.f32", llvmcall, Float32, (Float32, Int32), x, y)
+pow_fast(x::Float64, y::Integer) = ccall("llvm.powi.f64", llvmcall, Float64, (Float64, Int32), x, y)
 
 # TODO: Change sqrt_llvm intrinsic to avoid nan checking; add nan
 # checking to sqrt in math.jl; remove sqrt_llvm_fast intrinsic
diff --git a/base/inference.jl b/base/inference.jl
index db597a6023510..a65cb444ca357 100644
--- a/base/inference.jl
+++ b/base/inference.jl
@@ -447,7 +447,6 @@ add_tfunc(floor_llvm, 1, 1, math_tfunc)
 add_tfunc(trunc_llvm, 1, 1, math_tfunc)
 add_tfunc(rint_llvm, 1, 1, math_tfunc)
 add_tfunc(sqrt_llvm, 1, 1, math_tfunc)
-add_tfunc(powi_llvm, 2, 2, math_tfunc)
 add_tfunc(sqrt_llvm_fast, 1, 1, math_tfunc)
     ## same-type comparisons ##
 cmp_tfunc(x::ANY, y::ANY) = Bool
diff --git a/base/math.jl b/base/math.jl
index 523f7fec89299..21e646088e7f5 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -32,7 +32,7 @@ using Base: sign_mask, exponent_mask, exponent_one, exponent_bias,
             exponent_half, exponent_max, exponent_raw_max, fpinttype,
             significand_mask, significand_bits, exponent_bits
 
-using Core.Intrinsics: sqrt_llvm, powi_llvm
+using Core.Intrinsics: sqrt_llvm
 
 # non-type specific math functions
 
@@ -308,6 +308,8 @@ exp10(x::Float32) = 10.0f0^x
 exp10(x::Integer) = exp10(float(x))
 
 # utility for converting NaN return to DomainError
+# the branch in nan_dom_err prevents its callers from inlining, so be sure to force it
+# until the heuristics can be improved
 @inline nan_dom_err(f, x) = isnan(f) & !isnan(x) ? throw(DomainError()) : f
 
 # functions that return NaN on non-NaN argument for domain error
@@ -414,9 +416,9 @@ log1p(x)
 for f in (:sin, :cos, :tan, :asin, :acos, :acosh, :atanh, :log, :log2, :log10,
           :lgamma, :log1p)
     @eval begin
-        ($f)(x::Float64) = nan_dom_err(ccall(($(string(f)),libm), Float64, (Float64,), x), x)
-        ($f)(x::Float32) = nan_dom_err(ccall(($(string(f,"f")),libm), Float32, (Float32,), x), x)
-        ($f)(x::Real) = ($f)(float(x))
+        @inline ($f)(x::Float64) = nan_dom_err(ccall(($(string(f)),libm), Float64, (Float64,), x), x)
+        @inline ($f)(x::Float32) = nan_dom_err(ccall(($(string(f, "f")), libm), Float32, (Float32,), x), x)
+        @inline ($f)(x::Real) = ($f)(float(x))
     end
 end
 
@@ -677,14 +679,11 @@ function modf(x::Float64)
     f, _modf_temp[]
 end
 
-^(x::Float64, y::Float64) = nan_dom_err(ccall((:pow,libm),  Float64, (Float64,Float64), x, y), x+y)
-^(x::Float32, y::Float32) = nan_dom_err(ccall((:powf,libm), Float32, (Float32,Float32), x, y), x+y)
-
-^(x::Float64, y::Integer) = x^Int32(y)
-^(x::Float64, y::Int32) = powi_llvm(x, y)
-^(x::Float32, y::Integer) = x^Int32(y)
-^(x::Float32, y::Int32) = powi_llvm(x, y)
-^(x::Float16, y::Integer) = Float16(Float32(x)^y)
+@inline ^(x::Float64, y::Float64) = nan_dom_err(ccall("llvm.pow.f64", llvmcall, Float64, (Float64, Float64), x, y), x + y)
+@inline ^(x::Float32, y::Float32) = nan_dom_err(ccall("llvm.pow.f32", llvmcall, Float32, (Float32, Float32), x, y), x + y)
+@inline ^(x::Float64, y::Integer) = x ^ Float64(y)
+@inline ^(x::Float32, y::Integer) = x ^ Float32(y)
+@inline ^(x::Float16, y::Integer) = Float16(Float32(x) ^ Float32(y))
 
 function angle_restrict_symm(theta)
     const P1 = 4 * 7.8539812564849853515625e-01
diff --git a/base/replutil.jl b/base/replutil.jl
index d15f24ae58b9e..84cf032522974 100644
--- a/base/replutil.jl
+++ b/base/replutil.jl
@@ -219,28 +219,29 @@ showerror(io::IO, ex::InitError) = showerror(io, ex, [])
 function showerror(io::IO, ex::DomainError, bt; backtrace=true)
     print(io, "DomainError:")
     for b in bt
-        code = StackTraces.lookup(b)[1]
-        if !code.from_c
-            if code.func == :nan_dom_err
+        for code in StackTraces.lookup(b)
+            if code.from_c
+                continue
+            elseif code.func === :nan_dom_err
                 continue
             elseif code.func in (:log, :log2, :log10, :sqrt) # TODO add :besselj, :besseli, :bessely, :besselk
                 print(io, "\n$(code.func) will only return a complex result if called ",
                     "with a complex argument. Try $(string(code.func))(complex(x)).")
-            elseif (code.func == :^ && code.file == Symbol("intfuncs.jl")) ||
-                    code.func == :power_by_squaring #3024
+            elseif (code.func === :^ &&
+                    (code.file === Symbol("intfuncs.jl") || code.file === Symbol(joinpath(".", "intfuncs.jl")))) ||
+                   code.func === :power_by_squaring #3024
                 print(io, "\nCannot raise an integer x to a negative power -n. ",
                     "\nMake x a float by adding a zero decimal (e.g. 2.0^-n instead ",
                     "of 2^-n), or write 1/x^n, float(x)^-n, or (x//1)^-n.")
-            elseif code.func == :^ &&
-                    (code.file == Symbol("promotion.jl") || code.file == Symbol("math.jl") ||
-                    code.file == Symbol(joinpath(".","promotion.jl")) ||
-                    code.file == Symbol(joinpath(".","math.jl")))
+            elseif code.func === :^ &&
+                    (code.file === Symbol("math.jl") || code.file === Symbol(joinpath(".", "math.jl")))
                 print(io, "\nExponentiation yielding a complex result requires a complex ",
                     "argument.\nReplace x^y with (x+0im)^y, Complex(x)^y, or similar.")
             end
-            break
+            @goto showbacktrace
         end
     end
+    @label showbacktrace
     backtrace && show_backtrace(io, bt)
     nothing
 end
diff --git a/base/special/exp.jl b/base/special/exp.jl
index 9d7012a98034f..b90e7987eccf5 100644
--- a/base/special/exp.jl
+++ b/base/special/exp.jl
@@ -65,6 +65,10 @@ exp_small_thres(::Type{Float32}) = 2.0f0^-13
 Compute the natural base exponential of `x`, in other words ``e^x``.
 """
 function exp{T<:Union{Float32,Float64}}(x::T)
+    ^ = Base.FastMath.pow_fast
+    # we don't need the accurate power function here since we only compute 2^c,
+    # where c is a constant
+
     xa = reinterpret(Unsigned, x) & ~sign_mask(T)
     xsb = signbit(x)
 
@@ -114,13 +118,13 @@ function exp{T<:Union{Float32,Float64}}(x::T)
         # scale back
         if k > -significand_bits(T)
             # multiply by 2.0 first to prevent overflow, which helps extends the range
-            k == exponent_max(T) && return y*T(2.0)*T(2.0)^(exponent_max(T) - 1)
+            k == exponent_max(T) && return y * T(2.0) * T(2.0)^(exponent_max(T) - 1)
             twopk = reinterpret(T, rem(exponent_bias(T) + k, fpinttype(T)) << significand_bits(T))
             return y*twopk
         else
             # add significand_bits(T) + 1 to lift the range outside the subnormals
             twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, fpinttype(T)) << significand_bits(T))
-            return y*twopk*T(2.0)^(-significand_bits(T) - 1)
+            return y * twopk * T(2.0)^(-significand_bits(T) - 1)
         end
     elseif xa < reinterpret(Unsigned, exp_small_thres(T)) # |x| < exp_small_thres
         # Taylor approximation for small x
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 76d8bdd511f38..3545f023aaf1b 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -395,10 +395,6 @@ static Function *jldlsym_func;
 static Function *jlnewbits_func;
 static Function *jltypeassert_func;
 static Function *jldepwarnpi_func;
-#if JL_LLVM_VERSION < 30600
-static Function *jlpow_func;
-static Function *jlpowf_func;
-#endif
 //static Function *jlgetnthfield_func;
 static Function *jlgetnthfieldchecked_func;
 //static Function *jlsetnthfield_func;
@@ -5985,25 +5981,6 @@ static void init_julia_llvm_env(Module *m)
                          "jl_gc_diff_total_bytes", m);
     add_named_global(diff_gc_total_bytes_func, *jl_gc_diff_total_bytes);
 
-#if JL_LLVM_VERSION < 30600
-    Type *powf_type[2] = { T_float32, T_float32 };
-    jlpowf_func = Function::Create(FunctionType::get(T_float32, powf_type, false),
-                                   Function::ExternalLinkage,
-                                   "powf", m);
-    add_named_global(jlpowf_func, &powf, false);
-
-    Type *pow_type[2] = { T_float64, T_float64 };
-    jlpow_func = Function::Create(FunctionType::get(T_float64, pow_type, false),
-                                  Function::ExternalLinkage,
-                                  "pow", m);
-    add_named_global(jlpow_func,
-#ifdef _COMPILER_MICROSOFT_
-        static_cast<double (*)(double, double)>(&pow),
-#else
-        &pow,
-#endif
-        false);
-#endif
     std::vector<Type*> array_owner_args(0);
     array_owner_args.push_back(T_pjlvalue);
     jlarray_data_owner_func =
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index 27cc70985c070..b694e917a7256 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -71,7 +71,6 @@ static void jl_init_intrinsic_functions_codegen(Module *m)
     float_func[rint_llvm] = true;
     float_func[sqrt_llvm] = true;
     float_func[sqrt_llvm_fast] = true;
-    float_func[powi_llvm] = true;
 }
 
 extern "C"
@@ -915,33 +914,6 @@ static jl_cgval_t emit_intrinsic(intrinsic f, jl_value_t **args, size_t nargs,
         return mark_julia_type(ans, false, x.typ, ctx);
     }
 
-    case powi_llvm: {
-        const jl_cgval_t &x = argv[0];
-        const jl_cgval_t &y = argv[1];
-        if (!jl_is_bitstype(x.typ) || !jl_is_bitstype(y.typ) || jl_datatype_size(y.typ) != 4)
-            return emit_runtime_call(f, argv, nargs, ctx);
-        Type *xt = FLOATT(bitstype_to_llvm(x.typ));
-        Type *yt = T_int32;
-        if (!xt)
-            return emit_runtime_call(f, argv, nargs, ctx);
-
-        Value *xv = emit_unbox(xt, x, x.typ);
-        Value *yv = emit_unbox(yt, y, y.typ);
-#if JL_LLVM_VERSION >= 30600
-        Value *powi = Intrinsic::getDeclaration(jl_Module, Intrinsic::powi, makeArrayRef(xt));
-#if JL_LLVM_VERSION >= 30700
-        Value *ans = builder.CreateCall(powi, {xv, yv});
-#else
-        Value *ans = builder.CreateCall2(powi, xv, yv);
-#endif
-#else
-        // issue #6506
-        Value *ans = builder.CreateCall2(prepare_call(xt == T_float64 ? jlpow_func : jlpowf_func),
-                xv, builder.CreateSIToFP(yv, xt));
-#endif
-        return mark_julia_type(ans, false, x.typ, ctx);
-    }
-
     default: {
         assert(nargs >= 1 && "invalid nargs for intrinsic call");
         const jl_cgval_t &xinfo = argv[0];
diff --git a/src/intrinsics.h b/src/intrinsics.h
index 479b048b940a5..db22de2fa5a5c 100644
--- a/src/intrinsics.h
+++ b/src/intrinsics.h
@@ -91,7 +91,6 @@
     ADD_I(trunc_llvm, 1) \
     ADD_I(rint_llvm, 1) \
     ADD_I(sqrt_llvm, 1) \
-    ADD_I(powi_llvm, 2) \
     ALIAS(sqrt_llvm_fast, sqrt_llvm) \
     /*  pointer access */ \
     ADD_I(pointerref, 3) \
diff --git a/src/julia_internal.h b/src/julia_internal.h
index e0e2c8282799b..0b6b16bed095f 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -677,7 +677,6 @@ JL_DLLEXPORT jl_value_t *jl_floor_llvm(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_trunc_llvm(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_rint_llvm(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_sqrt_llvm(jl_value_t *a);
-JL_DLLEXPORT jl_value_t *jl_powi_llvm(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_abs_float(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_copysign_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_flipsign_int(jl_value_t *a, jl_value_t *b);
diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
index d69b1be21e6db..4f4d6bce39fd6 100644
--- a/src/runtime_intrinsics.c
+++ b/src/runtime_intrinsics.c
@@ -956,31 +956,6 @@ un_fintrinsic(trunc_float,trunc_llvm)
 un_fintrinsic(rint_float,rint_llvm)
 un_fintrinsic(sqrt_float,sqrt_llvm)
 
-JL_DLLEXPORT jl_value_t *jl_powi_llvm(jl_value_t *a, jl_value_t *b)
-{
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_value_t *ty = jl_typeof(a);
-    if (!jl_is_bitstype(ty))
-        jl_error("powi_llvm: a is not a bitstype");
-    if (!jl_is_bitstype(jl_typeof(b)) || jl_datatype_size(jl_typeof(b)) != 4)
-        jl_error("powi_llvm: b is not a 32-bit bitstype");
-    int sz = jl_datatype_size(ty);
-    jl_value_t *newv = jl_gc_alloc(ptls, sz, ty);
-    void *pa = jl_data_ptr(a), *pr = jl_data_ptr(newv);
-    switch (sz) {
-    /* choose the right size c-type operation */
-    case 4:
-        *(float*)pr = powf(*(float*)pa, (float)jl_unbox_int32(b));
-        break;
-    case 8:
-        *(double*)pr = pow(*(double*)pa, (double)jl_unbox_int32(b));
-        break;
-    default:
-        jl_error("powi_llvm: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64");
-    }
-    return newv;
-}
-
 JL_DLLEXPORT jl_value_t *jl_select_value(jl_value_t *isfalse, jl_value_t *a, jl_value_t *b)
 {
     JL_TYPECHK(isfalse, bool, isfalse);
diff --git a/test/math.jl b/test/math.jl
index b83a2bdba876c..e35ac7d016e89 100644
--- a/test/math.jl
+++ b/test/math.jl
@@ -996,6 +996,13 @@ end
     end
 end
 
+@testset "issue #19872" begin
+    f19872(x) = x ^ 3
+    @test issubnormal(2.0 ^ (-1024))
+    @test f19872(2.0) === 8.0
+    @test !issubnormal(0.0)
+end
+
 @test Base.Math.f32(complex(1.0,1.0)) == complex(Float32(1.),Float32(1.))
 @test Base.Math.f16(complex(1.0,1.0)) == complex(Float16(1.),Float16(1.))