From e5ddb686d54cf755f61ed4a7cd9f0195984cf170 Mon Sep 17 00:00:00 2001 From: Stephen Canon Date: Mon, 14 Jun 2021 16:22:32 -0400 Subject: [PATCH] Use the custom implementation of multipliedFullWidth on arm64_32 Previously we were falling back on the generic implementation for 64b integers, which resulted in the following codegen: 00000008 asr x8, x0, #32 0000000c asr x9, x0, #63 00000010 cmp x0, #0x0 00000014 cinv w10, w0, lt 00000018 eor w9, w10, w9 0000001c asr x10, x1, #32 00000020 asr x11, x1, #63 00000024 cmp x1, #0x0 00000028 cinv w12, w1, lt 0000002c eor w11, w12, w11 00000030 umull x12, w11, w9 00000034 mul x11, x11, x8 00000038 add x11, x11, x12, lsr #32 0000003c asr x12, x11, #63 00000040 cmp x11, #0x0 00000044 cinv w13, w11, lt 00000048 eor w12, w13, w12 0000004c madd x9, x9, x10, x12 00000050 mul x8, x10, x8 00000054 add x8, x8, x11, asr #32 00000058 add x0, x8, x9, asr #32 0000005c ret Instead, we should use the 64b implementation when targeting arm64_32, which allows us to generate: 00000008 smulh x0, x1, x0 0000000c ret Unsurprisingly, this is considerably faster. --- stdlib/public/core/IntegerTypes.swift.gyb | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/stdlib/public/core/IntegerTypes.swift.gyb b/stdlib/public/core/IntegerTypes.swift.gyb index 96f21a011976b..b4b2083b9df0d 100644 --- a/stdlib/public/core/IntegerTypes.swift.gyb +++ b/stdlib/public/core/IntegerTypes.swift.gyb @@ -1514,15 +1514,23 @@ ${assignmentOperatorComment(x.operator, True)} % end % dbits = bits*2 -% if bits <= word_bits: +% if bits == 64: + #if !(arch(arm) || arch(i386) || arch(wasm32)) + // On 32b architectures we fall back on the generic implementation, + // because LLVM doesn't know how to codegen the 128b multiply we use. + // + // Note that arm64_32 is a 64b architecture for the purposes of this + // check, because we have a 64x64 -> 128 multiply there (the actual + // ISA is AArch64). +% end /// Returns a tuple containing the high and low parts of the result of /// multiplying this value by the given value. /// /// Use this method to calculate the full result of a product that would /// otherwise overflow. Unlike traditional truncating multiplication, the - /// `multipliedFullWidth(by:)` method returns a tuple - /// containing both the `high` and `low` parts of the product of this value and - /// `other`. The following example uses this method to multiply two `UInt8` + /// `multipliedFullWidth(by:)` method returns a tuple containing both the + /// `high` and `low` parts of the product of this value and `other`. + /// The following example uses this method to multiply two `UInt8` /// values that normally overflow when multiplied: /// /// let x: UInt8 = 100 @@ -1557,6 +1565,8 @@ ${assignmentOperatorComment(x.operator, True)} let high = ${Self}(Builtin.truncOrBitCast_Int${dbits}_Int${bits}(shifted)) return (high: high, low: low) } +% if bits == 64: + #endif % end /// Returns a tuple containing the quotient and remainder of dividing the