Skip to content

Commit

Permalink
Auto merge of rust-lang#130387 - workingjubilee:rollup-1k3g708, r=wor…
Browse files Browse the repository at this point in the history
…kingjubilee

Rollup of 3 pull requests

Successful merges:

 - rust-lang#130295 (Fix target-cpu fpu features on Armv8-R.)
 - rust-lang#130325 (Use -0.0 in `intrinsics::simd::reduce_add_unordered`)
 - rust-lang#130371 (Correctly account for niche-optimized tags in rustc_transmute)

r? `@ghost`
`@rustbot` modify labels: rollup
  • Loading branch information
bors committed Sep 15, 2024
2 parents bc486f3 + e5c03c2 commit 6d35464
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 40 deletions.
4 changes: 2 additions & 2 deletions compiler/rustc_codegen_llvm/src/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2066,14 +2066,14 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
};
}

arith_red!(simd_reduce_add_ordered: vector_reduce_add, vector_reduce_fadd, true, add, 0.0);
arith_red!(simd_reduce_add_ordered: vector_reduce_add, vector_reduce_fadd, true, add, -0.0);
arith_red!(simd_reduce_mul_ordered: vector_reduce_mul, vector_reduce_fmul, true, mul, 1.0);
arith_red!(
simd_reduce_add_unordered: vector_reduce_add,
vector_reduce_fadd_reassoc,
false,
add,
0.0
-0.0
);
arith_red!(
simd_reduce_mul_unordered: vector_reduce_mul,
Expand Down
12 changes: 6 additions & 6 deletions compiler/rustc_target/src/spec/targets/armv8r_none_eabihf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@ pub(crate) fn target() -> Target {
linker: Some("rust-lld".into()),
relocation_model: RelocModel::Static,
panic_strategy: PanicStrategy::Abort,
// The Cortex-R52 has two variants with respect to floating-point support:
// 1. fp-armv8, SP-only, with 16 DP (32 SP) registers
// 2. neon-fp-armv8, SP+DP, with 32 DP registers
// Use the lesser of these two options as the default, as it will produce code
// compatible with either variant.
// Armv8-R requires a minimum set of floating-point features equivalent to:
// fp-armv8, SP-only, with 16 DP (32 SP) registers
// LLVM defines Armv8-R to include these features automatically.
//
// The Cortex-R52 supports these default features and optionally includes:
// neon-fp-armv8, SP+DP, with 32 DP registers
//
// Reference:
// Arm Cortex-R52 Processor Technical Reference Manual
// - Chapter 15 Advanced SIMD and floating-point support
features: "+fp-armv8,-fp64,-d32".into(),
max_atomic_width: Some(64),
emit_debug_gdb_scripts: false,
// GCC defaults to 8 for arm-none here.
Expand Down
35 changes: 25 additions & 10 deletions compiler/rustc_transmute/src/layout/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ pub(crate) mod rustc {
use rustc_middle::ty::{self, AdtDef, AdtKind, List, ScalarInt, Ty, TyCtxt, TypeVisitableExt};
use rustc_span::ErrorGuaranteed;
use rustc_target::abi::{
FieldIdx, FieldsShape, Layout, Size, TyAndLayout, VariantIdx, Variants,
FieldIdx, FieldsShape, Layout, Size, TagEncoding, TyAndLayout, VariantIdx, Variants,
};

use super::Tree;
Expand Down Expand Up @@ -319,11 +319,17 @@ pub(crate) mod rustc {
assert!(def.is_enum());

// Computes the variant of a given index.
let layout_of_variant = |index| {
let layout_of_variant = |index, encoding: Option<TagEncoding<VariantIdx>>| {
let tag = cx.tcx.tag_for_variant((cx.tcx.erase_regions(ty), index));
let variant_def = Def::Variant(def.variant(index));
let variant_layout = ty_variant(cx, (ty, layout), index);
Self::from_variant(variant_def, tag, (ty, variant_layout), layout.size, cx)
Self::from_variant(
variant_def,
tag.map(|tag| (tag, index, encoding.unwrap())),
(ty, variant_layout),
layout.size,
cx,
)
};

// We consider three kinds of enums, each demanding a different
Expand All @@ -345,9 +351,9 @@ pub(crate) mod rustc {
Variants::Single { index } => {
// `Variants::Single` on enums with variants denotes that
// the enum delegates its layout to the variant at `index`.
layout_of_variant(*index)
layout_of_variant(*index, None)
}
Variants::Multiple { tag_field, .. } => {
Variants::Multiple { tag, tag_encoding, tag_field, .. } => {
// `Variants::Multiple` denotes an enum with multiple
// variants. The layout of such an enum is the disjunction
// of the layouts of its tagged variants.
Expand All @@ -359,7 +365,7 @@ pub(crate) mod rustc {
let variants = def.discriminants(cx.tcx()).try_fold(
Self::uninhabited(),
|variants, (idx, ref discriminant)| {
let variant = layout_of_variant(idx)?;
let variant = layout_of_variant(idx, Some(tag_encoding.clone()))?;
Result::<Self, Err>::Ok(variants.or(variant))
},
)?;
Expand All @@ -380,7 +386,7 @@ pub(crate) mod rustc {
/// `0`.
fn from_variant(
def: Def<'tcx>,
tag: Option<ScalarInt>,
tag: Option<(ScalarInt, VariantIdx, TagEncoding<VariantIdx>)>,
(ty, layout): (Ty<'tcx>, Layout<'tcx>),
total_size: Size,
cx: LayoutCx<'tcx, TyCtxt<'tcx>>,
Expand All @@ -400,9 +406,18 @@ pub(crate) mod rustc {
let mut struct_tree = Self::def(def);

// If a `tag` is provided, place it at the start of the layout.
if let Some(tag) = tag {
size += tag.size();
struct_tree = struct_tree.then(Self::from_tag(tag, cx.tcx));
if let Some((tag, index, encoding)) = &tag {
match encoding {
TagEncoding::Direct => {
size += tag.size();
}
TagEncoding::Niche { niche_variants, .. } => {
if !niche_variants.contains(index) {
size += tag.size();
}
}
}
struct_tree = struct_tree.then(Self::from_tag(*tag, cx.tcx));
}

// Append the fields, in memory order, to the layout.
Expand Down
36 changes: 36 additions & 0 deletions tests/assembly/simd/reduce-fadd-unordered.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
//@ revisions: x86_64 x86_64-avx2 aarch64
//@ assembly-output: emit-asm
//@ compile-flags: --crate-type=lib -O
//@[aarch64] only-aarch64
//@[x86_64] only-x86_64
//@[x86_64-avx2] only-x86_64
//@[x86_64-avx2] compile-flags: -Ctarget-cpu=x86-64-v3
#![feature(portable_simd)]
#![feature(core_intrinsics)]
use std::intrinsics::simd as intrinsics;
use std::simd::*;
// Regression test for https://github.com/rust-lang/rust/issues/130028
// This intrinsic produces much worse code if you use +0.0 instead of -0.0 because
// +0.0 isn't as easy to algebraically reassociate, even using LLVM's reassoc attribute!
// It would emit about an extra fadd, depending on the architecture.

// CHECK-LABEL: reduce_fadd_negative_zero
pub unsafe fn reduce_fadd_negative_zero(v: f32x4) -> f32 {
// x86_64: addps
// x86_64-NEXT: movaps
// x86_64-NEXT: shufps
// x86_64-NEXT: addss
// x86_64-NOT: xorps

// x86_64-avx2: vaddps
// x86_64-avx2-NEXT: vmovshdup
// x86_64-avx2-NEXT: vaddss
// x86_64-avx2-NOT: vxorps

// aarch64: faddp
// aarch64-NEXT: faddp

// CHECK-NOT: {{f?}}add{{p?s*}}
// CHECK: ret
intrinsics::simd_reduce_add_unordered(v)
}
22 changes: 0 additions & 22 deletions tests/crashes/123693.rs

This file was deleted.

9 changes: 9 additions & 0 deletions tests/ui/transmutability/enums/niche_optimization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,3 +154,12 @@ fn no_niche() {
assert::is_transmutable::<Pair<V1, MaybeUninit<u8>>, OptionLike>();
assert::is_transmutable::<Pair<V2, MaybeUninit<u8>>, OptionLike>();
}

fn niche_fields() {
enum Kind {
A(bool, bool),
B(bool),
}

assert::is_maybe_transmutable::<u16, Kind>();
}

0 comments on commit 6d35464

Please sign in to comment.