[PowerPC] Fix codegen for transparent_union function params #101738

lei137 · 2024-08-02T19:12:12Z

Update codegen for func param with transparent_union attr to be that of the first union member.

llvmbot · 2024-08-02T19:12:44Z

@llvm/pr-subscribers-backend-powerpc

@llvm/pr-subscribers-clang-codegen

Author: Lei Huang (lei137)

Changes

Update codegen for func param with transparent_union attr to be that of the first union member.

Full diff: https://github.com/llvm/llvm-project/pull/101738.diff

5 Files Affected:

(modified) clang/lib/CodeGen/ABIInfoImpl.cpp (+7)
(modified) clang/lib/CodeGen/ABIInfoImpl.h (+4)
(modified) clang/lib/CodeGen/Targets/PPC.cpp (+20-6)
(added) clang/test/CodeGen/PowerPC/transparent_union.c (+54)
(added) llvm/test/CodeGen/PowerPC/transparent_union.ll (+67)

diff --git a/clang/lib/CodeGen/ABIInfoImpl.cpp b/clang/lib/CodeGen/ABIInfoImpl.cpp
index 35e8f79ba1bac..d73b7e882fe65 100644
--- a/clang/lib/CodeGen/ABIInfoImpl.cpp
+++ b/clang/lib/CodeGen/ABIInfoImpl.cpp
@@ -143,13 +143,20 @@ bool CodeGen::classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI,
 }
 
 QualType CodeGen::useFirstFieldIfTransparentUnion(QualType Ty) {
+  bool IsTransparentUnion;
+  return useFirstFieldIfTransparentUnion(Ty, IsTransparentUnion);
+}
+
+QualType CodeGen::useFirstFieldIfTransparentUnion(QualType Ty, bool &TU) {
   if (const RecordType *UT = Ty->getAsUnionType()) {
     const RecordDecl *UD = UT->getDecl();
     if (UD->hasAttr<TransparentUnionAttr>()) {
       assert(!UD->field_empty() && "sema created an empty transparent union");
+      TU = true;
       return UD->field_begin()->getType();
     }
   }
+  TU = false;
   return Ty;
 }
 
diff --git a/clang/lib/CodeGen/ABIInfoImpl.h b/clang/lib/CodeGen/ABIInfoImpl.h
index 2a3ef6b8a6c96..95e48ee49d5a4 100644
--- a/clang/lib/CodeGen/ABIInfoImpl.h
+++ b/clang/lib/CodeGen/ABIInfoImpl.h
@@ -65,6 +65,10 @@ CGCXXABI::RecordArgABI getRecordArgABI(QualType T, CGCXXABI &CXXABI);
 bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI,
                         const ABIInfo &Info);
 
+// For transparent union types, return the type of the first element.
+// Set reference TU to true if Ty given was a transparent union.
+QualType useFirstFieldIfTransparentUnion(QualType Ty, bool &TU);
+
 /// Pass transparent unions as if they were the type of the first element. Sema
 /// should ensure that all elements of the union have the same "machine type".
 QualType useFirstFieldIfTransparentUnion(QualType Ty);
diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp
index e4155810963eb..d2a3abbe24861 100644
--- a/clang/lib/CodeGen/Targets/PPC.cpp
+++ b/clang/lib/CodeGen/Targets/PPC.cpp
@@ -196,7 +196,8 @@ ABIArgInfo AIXABIInfo::classifyReturnType(QualType RetTy) const {
 }
 
 ABIArgInfo AIXABIInfo::classifyArgumentType(QualType Ty) const {
-  Ty = useFirstFieldIfTransparentUnion(Ty);
+  bool IsTransparentUnion;
+  Ty = useFirstFieldIfTransparentUnion(Ty, IsTransparentUnion);
 
   if (Ty->isAnyComplexType())
     return ABIArgInfo::getDirect();
@@ -217,8 +218,14 @@ ABIArgInfo AIXABIInfo::classifyArgumentType(QualType Ty) const {
                                    /*Realign*/ TyAlign > CCAlign);
   }
 
-  return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
-                                     : ABIArgInfo::getDirect());
+  if (isPromotableTypeForABI(Ty))
+    return (IsTransparentUnion ?
+        ABIArgInfo::getExtend(Ty,
+            llvm::IntegerType::get(getVMContext(),
+                                   getContext().getTypeSize(Ty)))
+        : ABIArgInfo::getExtend(Ty));
+
+  return (ABIArgInfo::getDirect());
 }
 
 CharUnits AIXABIInfo::getParamTypeAlignment(QualType Ty) const {
@@ -822,7 +829,8 @@ bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateSmallEnough(
 
 ABIArgInfo
 PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
-  Ty = useFirstFieldIfTransparentUnion(Ty);
+  bool IsTransparentUnion;
+  Ty = useFirstFieldIfTransparentUnion(Ty, IsTransparentUnion);
 
   if (Ty->isAnyComplexType())
     return ABIArgInfo::getDirect();
@@ -891,8 +899,14 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
                                    /*Realign=*/TyAlign > ABIAlign);
   }
 
-  return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
-                                     : ABIArgInfo::getDirect());
+  if (isPromotableTypeForABI(Ty))
+    return (IsTransparentUnion ?
+        ABIArgInfo::getExtend(Ty,
+            llvm::IntegerType::get(getVMContext(),
+                                   getContext().getTypeSize(Ty)))
+        : ABIArgInfo::getExtend(Ty));
+
+  return ABIArgInfo::getDirect();
 }
 
 ABIArgInfo
diff --git a/clang/test/CodeGen/PowerPC/transparent_union.c b/clang/test/CodeGen/PowerPC/transparent_union.c
new file mode 100644
index 0000000000000..6c61ce553ba7d
--- /dev/null
+++ b/clang/test/CodeGen/PowerPC/transparent_union.c
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -O2 -target-cpu pwr7 \
+// RUN:   -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-64
+// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -O2 -target-cpu pwr7 \
+// RUN:   -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-64
+// RUN: %clang_cc1 -triple powerpc64-unknown-aix -O2 -target-cpu pwr7 \
+// RUN:   -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-64
+// RUN: %clang_cc1 -triple powerpc-unknown-aix -O2 -target-cpu pwr7 \
+// RUN:   -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-AIX-32
+
+typedef union tu_c {
+	char a;
+	char b;
+} tu_c_t __attribute__((transparent_union));
+
+typedef union tu_s {
+	short a;
+} tu_s_t __attribute__((transparent_union));
+
+typedef union tu_us {
+	unsigned short a;
+} tu_us_t __attribute__((transparent_union));
+
+typedef union tu_l {
+	long a;
+} tu_l_t __attribute__((transparent_union));
+
+// CHECK-LABEL: define{{.*}} void @ftest0(
+// CHECK-SAME: i8 noundef signext [[UC_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret void
+void ftest0(tu_c_t uc) { }
+
+// CHECK-LABEL: define{{.*}} void @ftest1(
+// CHECK-SAME: i16 noundef signext [[UC_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret void
+void ftest1(tu_s_t uc) { }
+
+// CHECK-LABEL: define{{.*}} void @ftest2(
+// CHECK-SAME: i16 noundef zeroext [[UC_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret void
+void ftest2(tu_us_t uc) { }
+
+// CHECK-64-LABEL: define{{.*}} void @ftest3(
+// CHECK-64-SAME: i64 [[UC_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-64-NEXT:  [[ENTRY:.*:]]
+// CHECK-64-NEXT:    ret void
+//
+// CHECK-AIX-32-LABEL: define void @ftest3(
+// CHECK-AIX-32-SAME: i32 [[UC_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-AIX-32-NEXT:  [[ENTRY:.*:]]
+// CHECK-AIX-32-NEXT:    ret void
+void ftest3(tu_l_t uc) { }
diff --git a/llvm/test/CodeGen/PowerPC/transparent_union.ll b/llvm/test/CodeGen/PowerPC/transparent_union.ll
new file mode 100644
index 0000000000000..d04a010737421
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/transparent_union.ll
@@ -0,0 +1,67 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux -mcpu=pwr7 \
+; RUN:   -O2 -o - < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-64
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux -mcpu=pwr7 \
+; RUN:   -O2 -o - < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-64
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix -mcpu=pwr7 \
+; RUN:   -O2 -o - < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-64
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix -mcpu=pwr7 \
+; RUN:   -O2 -o - < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-AIX-32
+
+%union.tu_c = type { i8 }
+%union.tu_s = type { i16 }
+%union.tu_us = type { i16 }
+%union.tu_l = type { i64 }
+
+define void @ftest0(i8 noundef zeroext %uc.coerce) {
+; CHECK-LABEL: ftest0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    stb 3, -1(1)
+; CHECK-NEXT:    blr
+entry:
+  %uc = alloca %union.tu_c, align 1
+  %coerce.dive = getelementptr inbounds %union.tu_c, ptr %uc, i32 0, i32 0
+  store i8 %uc.coerce, ptr %coerce.dive, align 1
+  ret void
+}
+
+define void @ftest1(i16 noundef signext %uc.coerce) {
+; CHECK-LABEL: ftest1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sth 3, -2(1)
+; CHECK-NEXT:    blr
+entry:
+  %uc = alloca %union.tu_s, align 2
+  %coerce.dive = getelementptr inbounds %union.tu_s, ptr %uc, i32 0, i32 0
+  store i16 %uc.coerce, ptr %coerce.dive, align 2
+  ret void
+}
+
+define void @ftest2(i16 noundef zeroext %uc.coerce) {
+; CHECK-LABEL: ftest2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sth 3, -2(1)
+; CHECK-NEXT:    blr
+entry:
+  %uc = alloca %union.tu_us, align 2
+  %coerce.dive = getelementptr inbounds %union.tu_us, ptr %uc, i32 0, i32 0
+  store i16 %uc.coerce, ptr %coerce.dive, align 2
+  ret void
+}
+
+define void @ftest3(i64 %uc.coerce) {
+; CHECK-64-LABEL: ftest3:
+; CHECK-64:       # %bb.0: # %entry
+; CHECK-64-NEXT:    std 3, -8(1)
+; CHECK-64-NEXT:    blr
+;
+; CHECK-AIX-32-LABEL: ftest3:
+; CHECK-AIX-32:       # %bb.0: # %entry
+; CHECK-AIX-32-NEXT:    stw 4, -4(1)
+; CHECK-AIX-32-NEXT:    stw 3, -8(1)
+; CHECK-AIX-32-NEXT:    blr
+entry:
+  %uc = alloca %union.tu_l, align 8
+  %coerce.dive = getelementptr inbounds %union.tu_l, ptr %uc, i32 0, i32 0
+  store i64 %uc.coerce, ptr %coerce.dive, align 8
+  ret void
+}

github-actions · 2024-08-02T19:15:57Z

✅ With the latest revision this PR passed the C/C++ code formatter.

hubert-reinterpretcast · 2024-08-05T01:21:44Z

Code changes seem missing for --target=powerpc-unknown-linux-gnu.

clang/lib/CodeGen/ABIInfoImpl.h

clang/lib/CodeGen/Targets/PPC.cpp

efriedma-quic · 2024-08-05T20:52:43Z

transparent_union generally means that the value is passed using the convention of the first member of the union. Why do you need to check for whether the value was originally a transparent union? Shouldn't the resulting convention be the same either way?

lei137 · 2024-08-05T21:08:52Z

transparent_union generally means that the value is passed using the convention of the first member of the union. Why do you need to check for whether the value was originally a transparent union? Shouldn't the resulting convention be the same either way?

That would make sense... but to extend the transparent union type based on it's first member we have call getExtend with the llvm::type that we want to extend to. Otherwise it tries to extend the original union QualType. The static function ABIArgInfo getZeroExtend() that is called sets the ABIArgInfo to be the llvm::type given, otherwise it seem to default to the original if that optional param is nullptr.

efriedma-quic · 2024-08-06T02:59:44Z

Would it do any harm to just unconditionally compute the type and pass it into getExtend()?

lei137 · 2024-08-06T15:04:24Z

Would it do any harm to just unconditionally compute the type and pass it into getExtend()?

This seem to cause issues for type _Bool, where it changes the function param from i1 noundef zeroext %b1 to i8 noundef zeroext %b1.coerce.

$cat a.c
void fcall(_Bool);

_Bool test_wc_i1(_Bool b1, _Bool b2) {
  _Bool o=b1+b2;
  fcall(o);
  return o;
}

IR changes from:

target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512"
target triple = "powerpc64-unknown-linux-gnu"

; Function Attrs: nounwind
define dso_local zeroext i1 @test_wc_i1(i1 noundef zeroext %b1, i1 noundef zeroext %b2) local_unnamed_addr #0 {
entry:
  %0 = or i1 %b1, %b2
  tail call void @fcall(i1 noundef zeroext %0) #3
  ret i1 %0
}

declare void @fcall(i1 noundef zeroext) local_unnamed_addr #1

to:

target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512"
target triple = "powerpc64-unknown-linux-gnu"

; Function Attrs: nounwind
define dso_local noundef zeroext i1 @test_wc_i1(i8 noundef zeroext %b1.coerce, i8 noundef zeroext %b2.coerce) local_unnamed_addr #0 {
entry:
  %0 = or i8 %b2.coerce, %b1.coerce
  %1 = and i8 %0, 1
  %tobool = icmp ne i8 %1, 0
  tail call void @fcall(i8 noundef zeroext %1) #3
  ret i1 %tobool
}

declare void @fcall(i8 noundef zeroext) local_unnamed_addr #1

This is probably due to the fact I am getting the coerce type via:

llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(),  getContext().getTypeSize(Ty));

Not sure if there is a more general way to do this?

lei137 · 2024-08-06T16:33:01Z

I was thinking it would make the code cleaner if we can do something like this instead:

 // For transparent union types, return the type of the first element.
-// Set TU to true if Ty given was a transparent union and to false otherwise.
+// and set CTy the integer type of the first union element.  CTy defaults to nullptr.
-QualType CodeGen::useFirstFieldIfTransparentUnion(QualType Ty, bool &TU) {
+QualType CodeGen::useFirstFieldIfTransparentUnion(QualType Ty,
+                                                  llvm::Type *CTy) {
   if (const RecordType *UT = Ty->getAsUnionType()) {
     const RecordDecl *UD = UT->getDecl();
     if (UD->hasAttr<TransparentUnionAttr>()) {
       assert(!UD->field_empty() && "sema created an empty transparent union");
-      TU = true;
-      return UD->field_begin()->getType();
+      QualType UTy = UD->field_begin()->getType();
+      *CTy = llvm::IntegerType::get(getVMContext(),
+                                    getContext().getTypeSize(UTy));
+      return UTy;
     }
   }
-  TU = false;
   return Ty;
 }

Then we can just call getExtend(Ty, Cty) by default. But I'm not sure how to get access to getVMContext() and getContex() without passing more required params to this function ...

lei137 · 2024-08-16T00:04:00Z

Seems CGT.ConvertType(Ty) does what was needed. I've simplified the patch to use that instead.
@efriedma-quic FYI, this is more inline with your suggestion. Thank-you!

amy-kwan

I'm not an expert here, but I think the change you made makes sense since the Ty is supposed to be the first field.

Also, might be a dumb question, I saw we have 32-bit Linux run lines, but is that still worth testing?

lei137 · 2024-08-16T19:36:06Z

I'm not an expert here, but I think the change you made makes sense since the Ty is supposed to be the first field.

Also, might be a dumb question, I saw we have 32-bit Linux run lines, but is that still worth testing?

Thanks for taking a look @amy-kwan. The 32bit linux run line was added in response to review comment from Hubert: #101738 (comment)

efriedma-quic

LGTM

clang/test/CodeGen/PowerPC/transparent_union.c

llvm/test/CodeGen/PowerPC/transparent_union.ll

Update codegen for func param with transparent_union attr to be that of the first union member.

…parentUnion()

* 'main' of https://github.com/llvm/llvm-project: (1385 commits) [llvm][NVPTX] Fix quadratic runtime in ProxyRegErasure (#105730) [ScalarizeMaskedMemIntr] Don't use a scalar mask on GPUs (#104842) [clang][NFC] order C++ standards in reverse in release notes (#104866) Revert "[clang] Merge lifetimebound and GSL code paths for lifetime analysis (#104906)" (#105752) [SandboxIR] Implement CatchReturnInst (#105605) [RISCV][TTI] Use legalized element types when costing casts (#105723) [LTO] Use a helper function to add a definition (NFC) (#105721) [Vectorize] Fix a warning Revert "[clang][rtsan] Introduce realtime sanitizer codegen and drive… (#105744) [NFC][ADT] Add reverse iterators and `value_type` to StringRef (#105579) [mlir][SCF]-Fix loop coalescing with iteration arguements (#105488) [compiler-rt][test] Change tests to remove the use of `unset` command in lit internal shell (#104880) [Clang] [Parser] Improve diagnostic for `friend concept` (#105121) [clang][rtsan] Introduce realtime sanitizer codegen and driver (#102622) [libunwind] Stop installing the mach-o module map (#105616) [VPlan] Fix typo in cb4efe1d. [VPlan] Don't trigger VF assertion if VPlan has extra simplifications. [LLD][COFF] Generate X64 thunks for ARM64EC entry points and patchable functions. (#105499) [VPlan] Factor out precomputing costs from LVP::cost (NFC). AMDGPU: Remove global/flat atomic fadd intrinics (#97051) [LTO] Introduce helper functions to add GUIDs to ImportList (NFC) (#105555) Revert "[MCA][X86] Add missing 512-bit vpscatterqd/vscatterqps schedu… (#105716) [libc] Fix locale structs with old headergen [libc] Add `ctype.h` locale variants (#102711) [NFC] [MLIR] [OpenMP] Fixing typo of clause. (#105712) [AMDGPU] Correctly insert s_nops for dst forwarding hazard (#100276) Fix dap stacktrace perf issue (#104874) [HLSL][SPIRV]Add SPIRV generation for HLSL dot (#104656) [libc] Fix leftover thread local [NFC] [Docs] add missing space [libc] Initial support for 'locale.h' in the LLVM libc (#102689) Revert " [libc] Add `ctype.h` locale variants (#102711)" [libc] Add `ctype.h` locale variants (#102711) [libc++] Fix transform_error.mandates.verify.cpp test on msvc (#104635) [VPlan] Move EVL memory recipes to VPlanRecipes.cpp (NFC) [Xtensa,test] Fix div.ll after #99981 [MCA][X86] Add missing 512-bit vpscatterqd/vscatterqps schedule data [MCA][X86] Add scatter instruction test coverage for #105675 [IR] Simplify comparisons with std::optional (NFC) (#105624) Recommit "[FunctionAttrs] deduce attr `cold` on functions if all CG paths call a `cold` function" [lldb] Change the two remaining SInt64 settings in Target to uint (#105460) [libc++] Adjust armv7 XFAIL target triple for the setfill_wchar_max test. (#105586) [clang][bytecode] Fix 'if consteval' in non-constant contexts (#104707) [NFC] [SCCP] remove unused functions (#105603) [WebAssembly] Change half-precision feature name to fp16. (#105434) [C23] Remove WG14 N2517 from the status page [bindings][ocaml] Add missing AtomicRMW operations (#105673) [MCA][X86] Add scatter instruction test coverage for #105675 [Driver] Add -Wa, options -mmapsyms={default,implicit} [CodeGen] Construct SmallVector with iterator ranges (NFC) (#105622) [lldb] Fix typos in ScriptedInterface.h [AMDGPU][GlobalISel] Disable fixed-point iteration in all Combiners (#105517) [AArch64,ELF] Allow implicit $d/$x at section beginning [AArch64] Fix a warning [Vectorize] Fix warnings Reland "[asan] Remove debug tracing from `report_globals` (#104404)" (#105601) [X86] Add BSR/BSF tests to check for implicit zero extension [AArch64] Lower aarch64_neon_saddlv via SADDLV nodes. (#103307) [lldb][test] Add a unit-test for importRecordLayoutFromOrigin [ARM] Fix missing ELF FPU attributes for fp-armv8-fullfp16-d16 (#105677) [lldb] Pick the correct architecutre when target and core file disagree (#105576) [Verifier] Make lrint and lround intrinsic cases concise. NFC (#105676) [SLP]Improve/fix subvectors in gather/buildvector nodes handling [DwarfEhPrepare] Assign dummy debug location for more inserted _Unwind_Resume calls (#105513) [RISCV][GISel] Implement canLowerReturn. (#105465) [AMDGPU] Generate checks for vector indexing. NFC. (#105668) [NFC] Replace bool <= bool comparison (#102948) [SLP]Do not count extractelement costs in unreachable/landing pad blocks. [SimplifyCFG] Fold switch over ucmp/scmp to icmp and br (#105636) [libc++] Post-LLVM19-release docs cleanup (#99667) [AArch64] optimise SVE cmp intrinsics with no active lanes (#104779) [RISCV] Introduce local peephole to reduce VLs based on demanded VL (#104689) [DAG][RISCV] Use vp_reduce_* when widening illegal types for reductions (#105455) [libc++][docs] Major update to the documentation [InstCombine] Handle logical op for and/or of icmp 0/-1 [InstCombine] Add more test variants with poison elements (NFC) [LLVM][CodeGen][SVE] Increase vector.insert test coverage. [PowerPC] Fix mask for __st[d/w/h/b]cx builtins (#104453) [Analysis] Teach ScalarEvolution::getRangeRef about more dereferenceable objects (#104778) [mlir][LLVM] Add support for constant struct with multiple fields (#102752) [mlir][OpenMP][NFC] clean up optional reduction region parsing (#105644) [InstCombine] Add more tests for foldLogOpOfMaskedICmps transform (NFC) [clang][bytecode] Allow adding offsets to function pointers (#105641) [Clang][Sema] Rebuild template parameters for out-of-line template definitions and partial specializations (#104030) [InstCombine] Fold `scmp(x -nsw y, 0)` to `scmp(x, y)` (#105583) [flang][OpenMP] use reduction alloc region (#102525) [mlir][OpenMP] Convert reduction alloc region to LLVMIR (#102524) [mlir][OpenMP] Add optional alloc region to reduction decl (#102522) [libc++] Add link to the Github conformance table from the documentation [libc++] Fix the documentation build [NFC][SetTheory] Refactor to use const pointers and range loops (#105544) [NFC][VPlan] Correct two typos in comments. [clang][bytecode] Fix void unary * operators (#105640) Revert "[lldb] Extend frame recognizers to hide frames from backtraces (#104523)" Revert "[lldb-dap] Mark hidden frames as "subtle" (#105457)" Revert "[lldb][swig] Use the correct variable in the return statement" [DebugInfo][NFC] Constify debug DbgVariableRecord::{isDbgValue,isDbgDeclare} (#105570) [cmake] Include GNUInstallDirs before using variables defined by it. (#83807) [AMDGPU] GFX12 VMEM loads can write VGPR results out of order (#105549) [AMDGPU] Add GFX12 test coverage for vmcnt flushing in loop headers (#105548) [AArch64][GlobalISel] Libcall i128 srem/urem and scalarize more vector types. [AArch64] Add GISel srem/urem tests of various sizes. NFC LSV: forbid load-cycles when vectorizing; fix bug (#104815) [X86] Allow speculative BSR/BSF instructions on targets with CMOV (#102885) [lit] Fix substitutions containing backslashes (#103042) [Dexter] Sanitize user details from git repo URL in dexter --version (#105533) [SimplifyCFG] Add tests for switch over cmp intrinsic (NFC) [libc++] Refactor the std::unique_lock tests (#102151) Fix logf128 tests to allow negative NaNs from (#104929) [MemCpyOpt] Avoid infinite loops in `MemCpyOptPass::processMemCpyMemCpyDependence` (#103218) [mlir][dataflow] Propagate errors from `visitOperation` (#105448) Enable logf128 constant folding for hosts with 128bit long double (#104929) [mlir][llvmir][debug] Correctly generate location for phi nodes. (#105534) [Sparc] Add flags to enable errata workaround pass for GR712RC and UT700 (#104742) [lldb][AIX] Updating XCOFF,PPC entry in LLDB ArchSpec (#105523) [mlir][cuda] NFC: Remove accidentally committed 'asd' file. (#105491) [clang] Merge lifetimebound and GSL code paths for lifetime analysis (#104906) [Xtensa] Implement lowering Mul/Div/Shift operations. (#99981) [clang][bytecode] Don't discard all void-typed expressions (#105625) Build SanitizerCommon if ctx_profile enabled (#105495) [InstCombine] Fold icmp over select of cmp more aggressively (#105536) [SPIR-V] Rework usage of virtual registers' types and classes (#104104) [ELF] Move target to Ctx. NFC [Transforms] Refactor CreateControlFlowHub (#103013) [asan][Darwin] Simplify test (#105599) [Transforms] Construct SmallVector with iterator ranges (NFC) (#105607) [Flang][Runtime] Fix type used to store result of typeInfo::Value::Ge… (#105589) [PGO][OpenMP] Instrumentation for GPU devices (Revision of #76587) (#102691) [clang][NFC] remove resolved issue from StandardCPlusPlusModules.rst (#105610) AMDGPU: Handle folding frame indexes into s_add_i32 (#101694) [RISCV][GISel] Correct registers classes in vector anyext.mir test. NFC [ELF] Move script into Ctx. NFC [ELF] LinkerScript: initialize dot. NFC [RISCV][GISel] Correct registers classes in vector sext/zext.mir tests. NFC [ELF] Remove unneeded script->. NFC [ELF] Move mainPart to Ctx. NFC [Symbolizer, DebugInfo] Clean up LLVMSymbolizer API: const string& -> StringRef (#104541) [flang][NFC] Move OpenMP related passes into a separate directory (#104732) [RISCV] Add CSRs and an instruction for Smctr and Ssctr extensions. (#105148) [SandboxIR] Implement FuncletPadInst, CatchPadInst and CleanupInst (#105294) [lldb-dap] Skip the lldb-dap output test on windows, it seems all the lldb-dap tests are disabled on windows. (#105604) [libc] Fix accidentally using system file on GPU [llvm][nsan] Skip function declarations (#105598) Handle #dbg_values in SROA. (#94070) Revert "Speculative fix for asan/TestCases/Darwin/cstring_section.c" [BPF] introduce __attribute__((bpf_fastcall)) (#105417) [SandboxIR] Simplify matchers in ShuffleVectorInst unit test (NFC) (#105596) [compiler-rt][test] Added REQUIRES:shell to fuzzer test with for-loop (#105557) [ctx_prof] API to get the instrumentation of a BB (#105468) [lldb] Speculative fix for trap_frame_sym_ctx.test [LTO] Compare std::optional<ImportKind> directly with ImportKind (NFC) (#105561) [LTO] Use enum class for ImportFailureReason (NFC) (#105564) [flang][runtime] Add build-time flags to runtime to adjust SELECTED_x_KIND() (#105575) [libc] Add `scanf` support to the GPU build (#104812) [SandboxIR] Add tracking for `ShuffleVectorInst::setShuffleMask`. (#105590) [NFC][TableGen] Change global variables from anonymous NS to static (#105504) [SandboxIR] Fix use-of-uninitialized in ShuffleVectorInst unit test. (#105592) [InstCombine] Fold `sext(A < B) + zext(A > B)` into `ucmp/scmp(A, B)` (#103833) Revert "[Coroutines] [NFCI] Don't search the DILocalVariable for __promise when constructing the debug varaible for __coro_frame" Revert "[Coroutines] Fix -Wunused-variable in CoroFrame.cpp (NFC)" Revert "[Coroutines] Salvage the debug information for coroutine frames within optimizations" [mlir] Add nodiscard attribute to allowsUnregisteredDialects (#105530) [libc++] Mark LWG3404 as implemented [lldb-dap] When sending a DAP Output Event break each message into separate lines. (#105456) [RFC][flang] Replace special symbols in uniqued global names. (#104859) [libc++] Improve the granularity of status tracking from Github issues [ADT] Add `isPunct` to StringExtras (#105461) [SandboxIR] Add ShuffleVectorInst (#104891) [AArch64] Add SVE lowering of fixed-length UABD/SABD (#104991) [SLP]Try to keep scalars, used in phi nodes, if phi nodes from same block are vectorized. [SLP]Fix PR105120: fix the order of phi nodes vectorization. [CGData] Fix tests for sed without using options (#105546) [flang][OpenMP] Follow-up to build-breakage fix (#102028) [NFC][ADT] Remove << operators from StringRefTest (#105500) [lldb-dap] Implement `StepGranularity` for "next" and "step-in" (#105464) [Docs] Update Loop Optimization WG call. [gn build] Port a6bae5cb3791 [AMDGPU] Split GCNSubtarget into its own file. NFC. (#105525) [ctx_prof] Profile flatterner (#104539) [libc][docs] Update docs to reflect new headergen (#102381) [clang] [test] Use lit Syntax for Environment Variables in Clang subproject (#102647) [RISCV] Minor style fixes in lowerVectorMaskVecReduction [nfc] [libc++] Standardize how we track removed and superseded papers [libc++][NFC] A few mechanical adjustments to capitalization in status files [LLDB][Minidump] Fix ProcessMinidump::GetMemoryRegions to include 64b regions when /proc/pid maps are missing. (#101086) Scalarize the vector inputs to llvm.lround intrinsic by default. (#101054) [AArch64] Set scalar fneg to free for fnmul (#104814) [libcxx] Add cache file for the GPU build (#99348) [Offload] Improve error reporting on memory faults (#104254) [bazel] Fix mlir build broken by 681ae097. (#105552) [CGData] Rename CodeGenDataTests to CGDataTests (#105463) [ELF,test] Enhance hip-section-layout.s [clang-format] Use double hyphen for multiple-letter flags (#100978) [mlir] [tablegen] Make `hasSummary` and `hasDescription` useful (#105531) [flang][Driver] Remove misleading test comment (#105528) [MLIR][OpenMP] Add missing OpenMP to LLVM conversion patterns (#104440) [flang][debug] Allow non default array lower bounds. (#104467) [DAGCombiner] Fix ReplaceAllUsesOfValueWith mutation bug in visitFREEZE (#104924) Fix bug with -ffp-contract=fast-honor-pragmas (#104857) [RISCV] Add coverage for fp reductions of <2^N-1 x FP> vectors [AMDGPU][True16][MC] added VOPC realtrue/faketrue flag and fake16 instructions (#104739) [libc++] Enable C++23 and C++26 issues to be synchronized [gn] port 7ad7f8f7a3d4 Speculative fix for asan/TestCases/Darwin/cstring_section.c [libc++] Mark C++14 as complete and remove the status pages (#105514) [AArch64] Bail out for scalable vecs in areExtractShuffleVectors (#105484) [LTO] Use a range-based for loop (NFC) (#105467) [LTO] Use DenseSet in computeLTOCacheKey (NFC) (#105466) Revert "[flang][NFC] Move OpenMP related passes into a separate directory (#104732)" [AArch64] Add support for ACTLR_EL12 system register (#105497) [InstCombine] Add tests for icmp of select of cmp (NFC) [NFC][ADT] Format StringRefTest.cpp to fit in 80 columns. (#105502) [flang][NFC] Move OpenMP related passes into a separate directory (#104732) [libcxx] Add `LIBCXX_HAS_TERMINAL_AVAILABLE` CMake option to disable `print` terminal checks (#99259) [clang] Diagnose functions with too many parameters (#104833) [mlir][memref]: Allow collapse dummy strided unit dim (#103719) [lldb][swig] Use the correct variable in the return statement [libc++] Avoid -Wzero-as-null-pointer-constant in operator<=> (#79465) [llvm-reduce] Disable fixpoint verification in InstCombine [libc++] Refactor the tests for mutex, recursive mutex and their timed counterparts (#104852) [Clang] fix generic lambda inside requires-clause of friend function template (#99813) Revert "[asan] Remove debug tracing from `report_globals` (#104404)" [analyzer] Limit `isTainted()` by skipping complicated symbols (#105493) [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. (#102776) [InstCombine] Extend Fold of Zero-extended Bit Test (#102100) [LLVM][VPlan] Keep all VPBlend masks until VPlan transformation. (#104015) [gn build] Port 0cff3e85db00 [NFC][Support] Move ModRef/MemoryEffects printers to their own file (#105367) [NFC][ADT] Add unit test for llvm::mismatch. (#105459) LAA: pre-commit tests for stride-versioning (#97570) [VPlan] Only use selectVectorizationFactor for cross-check (NFCI). (#103033) [SPIR-V] Sort basic blocks to match the SPIR-V spec (#102929) [DAG] Add select_cc -> abd folds (#102137) [MLIR][mesh] moving shardinginterfaceimpl for tensor to tensor extension lib (#104913) AMDGPU: Remove flat/global atomic fadd v2bf16 intrinsics (#97050) [InstCombine] Remove some of the complexity-based canonicalization (#91185) [PS5][Driver] Link main components with -pie by default (#102901) [bazel] Port a3d41879ecf5690a73f9226951d3856c7faa34a4 [gn build] Port 6c189eaea994 [Clang][NFCI] Cleanup the fix for default function argument substitution (#104911) [AMDGPU][True16][test] added missing true16 flag in gfx12 asm vop1 (#104884) [RISCV] Make EmitRISCVCpuSupports accept multiple features (#104917) [AArch64] Add SME peephole optimizer pass (#104612) [RISCV] Remove experimental for Ssqosid ext (#105476) Revert "[LLVM] [X86] Fix integer overflows in frame layout for huge frames (#101840)" [llvm][test] Write temporary files into a temporary directory [GlobalIsel] Push cast through build vector (#104634) [Clang] Implement CWG2351 `void{}` (#78060) [VPlan] Introduce explicit ExtractFromEnd recipes for live-outs. (#100658) [gn build] Port 7c4cadfc4333 [mlir][vector] Add more tests for ConvertVectorToLLVM (5/n) (#104784) [mlir][Linalg] Bugfix for folder of `linalg.transpose` (#102888) [RISCV] Add Hazard3 Core as taped out for RP2350 (#102452) [X86][AVX10.2] Support AVX10.2-CONVERT new instructions. (#101600) [Flang][Runtime] Handle missing definitions in <cfenv> (#101242) [compiler-rt] Reland "SetThreadName implementation for Fuchsia" (#105179) [LAA] Collect loop guards only once in MemoryDepChecker (NFCI). [ELF] Move ppc64noTocRelax to Ctx. NFC [clang-repl] Fix printing preprocessed tokens and macros (#104964) [mlir][ODS] Optionally generate public C++ functions for type constraints (#104577) [Driver] Use llvm::make_range(std::pair) (NFC) (#105470) Revert "[AArch64] Optimize when storing symmetry constants" (#105474) [llvm][DWARFLinker] Don't attach DW_AT_dwo_id to CUs (#105186) [lldb-dap] Mark hidden frames as "subtle" (#105457) [clang][bytecode] Fix diagnostic in final ltor cast (#105292) [clang-repl] [codegen] Reduce the state in TBAA. NFC for static compilation. (#98138) [CMake] Update CMake cache file for the ARM/Aarch64 cross toolchain builds. NFC. (#103552) Revert "[FunctionAttrs] deduce attr `cold` on functions if all CG paths call a `cold` function" [AMDGPU] Update instrumentAddress method to support aligned size and unusual size accesses. (#104804) [BOLT] Improve BinaryFunction::inferFallThroughCounts() (#105450) [lldb][test] Workaround older systems that lack gettid (#104831) [LTO] Teach computeLTOCacheKey to return std::string (NFC) (#105331) [gn build] Port c8a678b1e486 [gn build] Port 55d744eea361 [ELF,test] Improve error-handling-script-linux.test [gn] tblgen opts for llvm-cgdata [MLIR][MathDialect] fix fp32 promotion crash when encounters scf.if (#104451) Reland "[gn build] Port d3fb41dddc11 (llvm-cgdata)" RISC-V: Add fminimumnum and fmaximumnum support (#104411) [mlir] Fix -Wunused-result in ElementwiseOpFusion.cpp (NFC) [RISCV][GISel] Merge RISCVCallLowering::lowerReturnVal into RISCVCallLowering::lowerReturn. NFC [AArch64] Basic SVE PCS support for handling scalable vectors on Darwin. Fix KCFI types for generated functions with integer normalization (#104826) [RISCV] Add coverage for int reductions of <3 x i8> vectors Revert "[RISCV][GISel] Allow >2*XLen integers in isSupportedReturnType." [DirectX] Register a few DXIL passes with the new PM [RISCV][GISel] Allow >2*XLen integers in isSupportedReturnType. [mlir][linalg] Improve getPreservedProducerResults estimation in ElementwiseOpFusion (#104409) [lldb] Extend frame recognizers to hide frames from backtraces (#104523) [RISCV][GISel] Split LoadStoreActions in LoadActions and StoreActions. [lldb][test] XFAIL TestAnonNamespaceParamFunc.cpp on Windows [FunctionAttrs] deduce attr `cold` on functions if all CG paths call a `cold` function [FunctionAttrs] Add tests for deducing attr `cold` on functions; NFC [DXIL][Analysis] Update test to match comment. NFC (#105409) [flang] Fix test on ppc64le & aarch64 (#105439) [bazel] Add missing dependencies for c8a678b1e4863df2845b1305849534047f10caf1 [RISCV][GISel] Remove s32 support for G_ABS on RV64. [TableGen] Rework `EmitIntrinsicToBuiltinMap` (#104681) [libc] move newheadergen back to safe_load (#105374) [cmake] Set up llvm-ml as ASM_MASM tool in WinMsvc.cmake (#104903) [libc] Include startup code when installing all (#105203) [DAG][RISCV] Use vp.<binop> when widening illegal types for binops which can trap (#105214) [BOLT] Reduce CFI warning verbosity (#105336) [flang] Disable part of failing test (temporary) (#105350) AMDGPU: Temporarily stop adding AtomicExpand to new PM passes [OpenMP] Temporarily disable test to keep bots green [Clang] Re-land Overflow Pattern Exclusions (#104889) [RISCV][GISel] Remove s32 support on RV64 for DIV, and REM. (#102519) [flang] Disable failing test (#105327) [NFC] Fix a typo in InternalsManual: ActOnCXX -> ActOnXXX (#105207) [NFC] Fixed two typos: "__builin_" --> "__builtin_" (#98782) [flang] Re-enable date_and_time intrinsic test (NFC) (#104967) [clang] Support -Wa, options -mmsa and -mno-msa (#99615) AMDGPU/NewPM: Start filling out addIRPasses (#102884) AMDGPU/NewPM: Fill out passes in addCodeGenPrepare (#102867) [SandboxIR] Implement CatchSwitchInst (#104652) clang/AMDGPU: Emit atomicrmw for flat/global atomic min/max f64 builtins (#96876) clang/AMDGPU: Emit atomicrmw for global/flat fadd v2bf16 builtins (#96875) clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins (#96874) [Driver,DXIL] Fix build [Attributor] Improve AAUnderlyingObjects (#104835) [flang] Fix IEEE_NEAREST_AFTER folding edge cases (#104846) [flang] Silence spurious error (#104821) [flang] Silence an inappropriate warning (#104685) [flang] Fix inheritance of IMPLICIT typing rules (#102692) [flang] More support for anonymous parent components in struct constr… (#102642) clang/AMDGPU: Emit atomicrmw from {global|flat}_atomic_fadd_v2f16 builtins (#96873) [lldb][test] Change unsupported cat -e to cat -v to work with lit internal shell (#104878) [llvm-lit][test] Updated built-in cat command tests (#104473) [mlir][gpu] Add extra value types for gpu::ShuffleOp (#104605) [AArch64][MachO] Add ptrauth ABI version to arm64e cpusubtype. (#104650) [libc++] Fix several double-moves in the code base (#104616) [lldb] Disable the API test TestCppBitfields on Windows (#105037) llvm.lround: Update verifier to validate support of vector types. (#98950) [mlir][sparse] support sparsification to coiterate operations. (#102546) Fix post-104491 (#105191) [mlir][tablegen] Fix tablegen bug with `Complex` class (#104974) [DirectX] Encapsulate DXILOpLowering's state into a class. NFC [ctx_prof] Add analysis utility to fetch ID of a callsite (#104491) [lldb] Fix windows debug build after 9d07f43 (#104896) [lldb][ClangExpressionParser] Implement ExternalSemaSource::ReadUndefinedButUsed (#104817) Revert "[compiler-rt][fuzzer] implements SetThreadName for fuchsia." (#105162) [lldb][ClangExpressionParser] Don't leak memory when multiplexing ExternalASTSources (#104799) [mlir][gpu] Add 'cluster_size' attribute to gpu.subgroup_reduce (#104851) [mlir][spirv] Support `gpu` in `convert-to-spirv` pass (#105010) [libc++][chono] Use hidden friends for leap_second comparison. (#104713) [OpenMP] Map `omp_default_mem_alloc` to global memory (#104790) [NFC][TableGen] Elminate use of isalpha/isdigit from TGLexer (#104837) [HLSL] Implement support for HLSL intrinsic - saturate (#104619) [RISCV] Add isel optimization for (and (sra y, c2), c1) to recover regression from #101751. (#104114) [bazel] Add missing deps in {Arith,DLTI}DialectTdFiles (#105091) [bazel] Port bf68e9047f62c22ca87f9a4a7c59a46b3de06abb (#104907) [Clang] CWG722: nullptr to ellipses (#104704) [RISCV] Add coverage for VP div[u]/rem[u] with non-power-of-2 vectors Recommit "[CodeGenPrepare] Folding `urem` with loop invariant value" [CodeGenPrepare][X86] Add tests for fixing `urem` transform; NFC Fix a warning for -Wcovered-switch-default (#105054) [OpenMP][FIX] Check for requirements early (#104836) [mlir] [irdl] Improve IRDL documentation (#104928) [CMake] Remove HAVE_LINK_H [Support] Remove unneeded __has_include fallback [docs] Fix typo in llvm.experimental.vector.compress code-block snippet [clang][ASTMatcher] Fix execution order of hasOperands submatchers (#104148) InferAddressSpaces: Factor replacement loop into function [NFC] (#104430) [DXIL][Analysis] Delete unnecessary test (#105025) [MLIR][EmitC] Allow ptrdiff_t as result in sub op (#104921) [NFC] Remove explicit bitcode enumeration from BitCodeFormat.rst (#102618) [NVPTX] Add elect.sync Intrinsic (#104780) [AMDGPU] Move AMDGPUMemoryUtils out of Utils. NFC. (#104930) [clang][OpenMP] Fix typo in comment, NFC [AArch64] fix buildbot by removing dead code [llvm-cgdata] Fix -Wcovered-switch-default (NFC) Reenable anon structs (#104922) [DXIL][Analysis] Add validator version to info collected by Module Metadata Analysis (#104828) Reland [CGData] llvm-cgdata #89884 (#101461) [CostModel][X86] Add missing costkinds for scalar CTLZ/CTTZ instructions [Driver] Make ffp-model=fast honor non-finite-values, introduce ffp-model=aggressive (#100453) [InstCombine] Thwart complexity-based canonicalization in test (NFC) [AArch64] Extend sxtw peephole to uxtw. (#104516) Reapply "[CycleAnalysis] Methods to verify cycles and their nesting. (#102300)" [AArch64] Optimize when storing symmetry constants (#93717) [lldb][Windows] Fixed the API test breakpoint_with_realpath_and_source_map (#104918) [SPARC] Remove assertions in printOperand for inline asm operands (#104692) [llvm][offload] Move AMDGPU offload utilities to LLVM (#102487) [AArch64][NEON] Extend faminmax patterns with fminnm/fmaxnm (#104766) [AArch64] Remove TargetParser CPU/Arch feature tests (#104587) [InstCombine] Adjust fixpoint error message (NFC) [LLVM] Add a C API for creating instructions with custom syncscopes. (#104775) [llvm-c] Add getters for LLVMContextRef for various types (#99087) [clang][NFC] Split invalid-cpu-note tests (#104601) [X86][AVX10] Fix unexpected error and warning when using intrinsic (#104781) [ScheduleDAG] Dirty height/depth in addPred/removePred even for latency zero (#102915) [gn build] Port 42067f26cd08 [X86] Use correct fp immediate types in _mm_set_ss/sd [X86] Add clang codegen test coverage for #104848 [SimplifyCFG] Add support for hoisting commutative instructions (#104805) [clang][bytecode] Fix discarding CompoundLiteralExprs (#104909) Revert "[CycleAnalysis] Methods to verify cycles and their nesting. (#102300)" [LLVM-Reduce] - Distinct Metadata Reduction (#104624) [clang][modules] Built-in modules are not correctly enabled for Mac Catalyst (#104872) [MLIR][DLTI] Introduce DLTIQueryInterface and impl for DLTI attrs (#104595) [Flang][OpenMP] Prevent re-composition of composite constructs (#102613) [BasicAA] Use nuw attribute of GEPs (#98608) [CycleAnalysis] Methods to verify cycles and their nesting. (#102300) [mlir][EmitC] Model lvalues as a type in EmitC (#91475) [mlir][EmitC] Do not convert illegal types in EmitC (#104571) [Clang][test] Add bytecode interpreter tests for floating comparison functions (#104703) [clang][bytecode] Fix initializing base casts (#104901) [mlir][ArmSME][docs] Update example (NFC) [llvm][GitHub] Fix formatting of new contributor comments [Coroutines] Salvage the debug information for coroutine frames within optimizations [lldb][AIX] 1. Avoid namespace collision on other platforms (#104679) [MLIR][Bufferize][NFC] Fix documentation typo (#104881) [LV] Simplify !UserVF.isZero() -> UserVF (NFC). [DataLayout] Refactor the rest of `parseSpecification` (#104545) [LLD][COFF] Detect weak reference cycles. (#104463) [MLIR][Python] remove unused init python file (#104890) [clang-doc] add support for block commands in clang-doc html output (#101108) [Coroutines] Fix -Wunused-variable in CoroFrame.cpp (NFC) [IR] Check that arguments of naked function are not used (#104757) [Coroutines] [NFCI] Don't search the DILocalVariable for __promise when constructing the debug varaible for __coro_frame [MLIR] Introduce a SelectLikeOpInterface (#104751) Revert "[scudo] Add partial chunk heuristic to retrieval algorithm." (#104894) [NVPTX] Fix bugs involving maximum/minimum and bf16 [SelectionDAG] Fix lowering of IEEE 754 2019 minimum/maximum [llvm-objcopy][WebAssembly] Allow --strip-debug to operate on relocatable files. (#102978) [lld][WebAssembly] Ignore local symbols when parsing lazy object files. (#104876) [clang][bytecode] Support ObjC blocks (#104551) Revert "[mlir] NFC: fix dependence of (Tensor|Linalg|MemRef|Complex) dialects on LLVM Dialect and LLVM Core in CMake build (#104832)" [ADT] Fix a minor build error (#104840) [Driver] Default -msmall-data-limit= to 0 and clean up code [docs] Revise the doc for __builtin_allow_runtime_check [MLIR][Transforms] Fix dialect conversion inverse mapping (#104648) [scudo] Add partial chunk heuristic to retrieval algorithm. (#104807) [mlir] NFC: fix dependence of (Tensor|Linalg|MemRef|Complex) dialects on LLVM Dialect and LLVM Core in CMake build (#104832) [offload] - Fix issue with standalone debug offload build (#104647) [ValueTracking] Handle incompatible types instead of asserting in `isKnownNonEqual`; NFC [AMDGPU] Add VOPD combine dependency tests. NFC. (#104841) [compiler-rt][fuzzer] implements SetThreadName for fuchsia. (#99953) [Support] Do not ignore unterminated open { in formatv (#104688) Reapply "[HWASan] symbolize stack overflows" (#102951) (#104036) Fix StartDebuggingRequestHandler/ReplModeRequestHandler in lldb-dap (#104824) Emit `BeginSourceFile` failure with `elog`. (#104845) [libc][NFC] Add sollya script to compute worst case range reduction. (#104803) Reland "[asan] Catch `initialization-order-fiasco` in modules without…" (#104730) [NFC][asan] Create `ModuleName` lazily (#104729) [asan] Better `___asan_gen_` names (#104728) [NFC][ADT] Add range wrapper for std::mismatch (#104838) [Clang] Fix ICE in SemaOpenMP with structured binding (#104822) [MC] Remove duplicate getFixupKindInfo calls. NFC [C++23] Fix infinite recursion (Clang 19.x regression) (#104829) AMDGPU/NewPM: Start implementing addCodeGenPrepare (#102816) [AMDGPU][Docs] DWARF aspace-aware base types Pre-commit AMDGPU tests for masked load/store/scatter/gather (#104645) [ADT] Add a missing call to a unique_function destructor after move (#98747) [ADT] Minor code cleanup in STLExtras.h (#104808) [libc++abi] Remove unnecessary dependency on std::unique_ptr (#73277) [clang] Increase the default expression nesting limit (#104717) [mlir][spirv] Fix incorrect metadata in SPIR-V Header (#104242) [ADT] Fix alignment check in unique_function constructor (#99403) LSV: fix style after cursory reading (NFC) (#104793) Revert "[BPF] introduce `__attribute__((bpf_fastcall))` (#101228)" [NFC][asan] Don't `cd` after `split-file` (#104727) [NFC][Instrumentation] Use `Twine` in `createPrivateGlobalForString` (#104726) [mlir][spirv] Add `GroupNonUniformBallotFindLSB` and `GroupNonUniformBallotFindMSB` ops (#104791) [GlobalISel] Bail out early for big-endian (#103310) [compiler-rt][nsan] Add more tests for shadow memory (#100906) [Flang] Fix test case for AIX(big-endian) system for issuing an extra message. (#104792) [asan] Change Apple back to fixed allocator base address (#104818) [NVPTX] Add conversion intrinsics from/to fp8 types (e4m3, e5m2) (#102969) [RISCV] Improve BCLRITwoBitsMaskHigh SDNodeXForm. NFC [clang][dataflow] Collect local variables referenced within a functio… (#104459) [AMDGPU][GlobalISel] Save a copy in one case of addrspacecast (#104789) [AMDGPU] Simplify, fix and improve known bits for mbcnt (#104768) [TableGen] Detect invalid -D arguments and fail (#102813) [DirectX] Disentangle DXIL.td's op types from LLVMType. NFC [Clang] Check constraints for an explicit instantiation of a member function (#104438) [DirectX] Differentiate between 0/1 overloads in the OpBuilder. NFC [docs] Add note about "Re-request review" (#104735) [lld][ELF] Combine uniqued small data sections (#104485) [BPF] introduce `__attribute__((bpf_fastcall))` (#101228) [SmallPtrSet] Optimize find/erase [PowerPC] Fix codegen for transparent_union function params (#101738) [llvm-mca] Add bottle-neck analysis to JSON output. (#90056) [lldb][Python] Silence GCC warning for modules error workaround [gn build] Port a56663591573 [gn build] Port a449b857241d [clang][bytecode] Discard NullToPointer cast SubExpr (#104782) [lldb] PopulatePrpsInfoTest can fail due to hardcoded priority value (#104617) [mlir][[spirv] Add support for math.log2 and math.log10 to GLSL/OpenCL SPIRV Backends (#104608) [lldb][test] Fix GCC warnings in TestGetControlFlowKindX86.cpp [TableGen] Resolve References at top level (#104578) [LLVM] [X86] Fix integer overflows in frame layout for huge frames (#101840) [lldb][ASTUtils] Remove unused SemaSourceWithPriorities::addSource API [lldb][test] Fix cast dropping const warnin in TestBreakpointSetCallback.cpp [SimplifyCFG] Add tests for hoisting of commutative instructions (NFC) [AMDGPU][R600] Move R600CodeGenPassBuilder into R600TargetMachine(NFC). (#103721) Revert "[clang][ExtractAPI] Stop dropping fields of nested anonymous record types when they aren't attached to variable declaration (#104600)" MathExtras: template'ize alignToPowerOf2 (#97814) [AMDGPU] Move AMDGPUCodeGenPassBuilder into AMDGPUTargetMachine(NFC) (#103720) [clang][ExtractAPI] Stop dropping fields of nested anonymous record types when they aren't attached to variable declaration (#104600) [Clang][NFC] Fix potential null dereference in encodeTypeForFunctionPointerAuth (#104737) [DebugInfo] Make tests SimplifyCFG-independent (NFC) [mlir][ArmSME] Remove XFAILs (#104758) [RISCV] Add vector and vector crypto to SiFiveP400 scheduler model (#102155) [clang][OpenMP] Diagnose badly-formed collapsed imperfect loop nests (#60678) (#101305) Require !windows instead of XFAIL'ing ubsan/TestCases/Integer/bit-int.c [clang][bytecode] Fix member pointers to IndirectFieldDecls (#104756) [AArch64] Add fneg(fmul) and fmul(fneg) tests. NFC [clang][bytecode] Use first FieldDecl instead of asserting (#104760) [DataLayout] Refactor parsing of i/f/v/a specifications (#104699) [X86] LowerABD - simplify i32/i64 to use sub+sub+cmov instead of repeating nodes via abs (#102174) [docs] Update a filename, fix indentation (#103018) [CostModel][X86] Add cost tests for scmp/ucmp intrinsics [NFC][SLP] Remove useless code of the schedule (#104697) [VPlan] Rename getBestPlanFor -> getPlanFor (NFC). [InstCombine] Fold `(x < y) ? -1 : zext(x != y)` into `u/scmp(x,y)` (#101049) [VPlan] Emit note when UserVF > MaxUserVF (NFCI). [LLVM][NewPM] Add C API for running the pipeline on a single function. (#103773) [mlir][vector] Populate sink patterns in apply_patterns.vector.reduction_to_contract (#104754) [lld][MachO] Fix a suspicous assert in SyntheticSections.cpp [PowerPC] Support -mno-red-zone option (#94581) [PAC][ELF][AArch64] Encode several ptrauth features in PAuth core info (#102508) [VPlan] Rename getBestVF -> computeBestVF (NFC). [MLIR][LLVM] Improve the noalias propagation during inlining (#104750) [LoongArch] Fix the assertion for atomic store with 'ptr' type [AArch64][SME] Return false from produceCompactUnwindFrame if VG save required. (#104588) [X86] Cleanup lowerShuffleWithUNPCK/PACK signatures to match (most) other lowerShuffle* methods. NFC. [X86] VPERM2*128 instructions aren't microcoded on znver1 [X86] VPERM2*128 instructions aren't microcoded on znver2 [VPlan] Move some LoopVectorizationPlanner helpers to VPlan.cpp (NFC). [mlir][docs] Update Bytecode documentation (#99854) [SimplifyCFG] Don't block sinking for allocas if no phi created (#104579) [LoongArch] Merge base and offset for LSX/LASX memory accesses (#104452) [RISCV] Make extension names lower case in RISCVISAInfo::checkDependency() error messages. [RISCV] Add helper functions to exploit similarity of some RISCVISAInfo::checkDependency() error strings. NFC [RISCV] Merge some ISA error reporting together and make some errors more precise. [RISCV] Simplify reserse fixed regs (#104736) [RISCV] Add more tests for RISCVISAInfo::checkDependency(). NFC [Sparc] Add errata workaround pass for GR712RC and UT700 (#103843) [TableGen] Print Error and not crash on dumping non-string values (#104568) [RISCV][MC] Support experimental extensions Zvbc32e and Zvkgs (#103709) Revert "[CodeGenPrepare] Folding `urem` with loop invariant value" [SelectionDAG][X86] Preserve unpredictable metadata for conditional branches in SelectionDAG, as well as JCCs generated by X86 backend. (#102101) [MLIR][Python] enhance python api for tensor.empty (#103087) [AMDGPU][NFC] Fix preload-kernarg.ll test after attributor move (#98840) [CodeGenPrepare] Folding `urem` with loop invariant value [CodeGenPrepare][X86] Add tests for folding `urem` with loop invariant value; NFC [MC] Remove ELFRelocationEntry::OriginalAddend [TLI] Add support for inferring attr `cold`/`noreturn` on `std::terminate` and `__cxa_throw` [DAG][PatternMatch] Add support for matchers with flags; NFC Update Clang version from 19 to 20 in scan-build.1. [clang-format] Change GNU style language standard to LS_Latest (#104669) [MIPS] Remove expensive LLVM_DEBUG relocation dump [MC] Add test that requires multiple relaxation steps [libc][gpu] Add Atan2 Benchmarks (#104708) [libc] Add single threaded kernel attributes to AMDGPU startup utility (#104651) [HIP] search fatbin symbols for libs passed by -l (#104638) [gn build] Port 0d150db214e2 [llvm][clang] Move RewriterBuffer to ADT. (#99770) [Clang] Do not allow `[[clang::lifetimebound]]` on explicit object member functions (#96113) [clang][OpenMP] Change /* ParamName */ to /*ParamName=*/, NFC [clang-tidy] Support member functions with modernize-use-std-print/format (#104675) [clang] fix divide by zero in ComplexExprEvaluator (#104666) [clang][OpenMP] Avoid multiple calls to getCurrentDirective in DSAChecker, NFC [clang][bytecode] Only booleans can be inverted [Flang]: Use actual endianness for Integer<80> (#103928) [libc++][docs] Fixing hyperlink for mathematical special function documentation (#104444) [InstSimplify] Simplify `uadd.sat(X, Y) u>= X + Y` and `usub.sat(X, Y) u<= X, Y` (#104698) [LV] Don't cost branches and conditions to empty blocks. [clang][test] Remove bytecode interpreter RUN line from test [Clang] warn on discarded [[nodiscard]] function results after casting in C (#104677) [GlobalISel] Add and use an Opcode variable and update match-table-cxx.td checks. NFC [Clang] `constexpr` builtin floating point classification / comparison functions (#94118) [clang][bytecode] IntPointer::atOffset() should append (#104686) [clang][bytecode][NFC] Improve Pointer::print() [RISCV] Remove unused tablegen classes from unratified Zbp instructions. NFC [PowerPC] Use MathExtras helpers to simplify code. NFC (#104691) [clang-tidy] Correct typo in ReleaseNotes.rst (#104674) [APInt] Replace enum with static constexpr member variables. NFC [MLIR][OpenMP] Fix MLIR->LLVM value matching in privatization logic (#103718) [VE] Use SelectionDAG::getSignedConstant/getAllOnesConstant. [gn build] Port 27a62ec72aed [LSR] Split the -lsr-term-fold transformation into it's own pass (#104234) [AArch64] Use SelectionDAG::getSignedConstant/getAllOnesConstant. [ARM] Use SelectonDAG::getSignedConstant. [SelectionDAG] Use getAllOnesConstant. [LLD] [MinGW] Recognize the -rpath option (#102886) [clang][bytecode] Fix shifting negative values (#104663) [flang] Handle Hollerith in data statement initialization in big endian (#103451) [clang][bytecode] Classify 1-bit unsigned integers as bool (#104662) [RISCV][MC] Make error message of CSR with wrong extension more detailed (#104424) [X86] Don't save/restore fp around longjmp instructions (#102556) AMDGPU: Add tonearest and towardzero roundings for intrinsic llvm.fptrunc.round (#104486) [libc] Fix type signature for strlcpy and strlcat (#104643) [AArch64] Add a check for invalid default features (#104435) [clang][NFC] Clean up `Sema` headers [NFC] Cleanup in ADT and Analysis headers. (#104484) [InstCombine] Avoid infinite loop when negating phi nodes (#104581) Add non-temporal support for LLVM masked loads (#104598) [AMDGPU] Disable inline constants for pseudo scalar transcendentals (#104395) [mlir][Transforms] Dialect conversion: Fix bug in `computeNecessaryMaterializations` (#104630) [RISCV] Use getAllOnesConstant/getSignedConstant. [SelectionDAG] Use getSignedConstant/getAllOnesConstant. [NFC][asan] Make 'Module &M' class member [AMDGPU][NFC] Remove duplicate code by using getAddressableLocalMemorySize (#104604) [CodeGen][asan] Use `%t` instead of `cd` in test Revert "[asan] Catch `initialization-order-fiasco` in modules without globals" (#104665) [SelectionDAG][X86] Use getAllOnesConstant. NFC (#104640) [LLVM][NVPTX] Add support for brkpt instruction (#104470) [asan] Catch `initialization-order-fiasco` in modules without globals (#104621) [RISCV] Remove feature implication from Zvknhb. [clang-format] Adjust requires clause wrapping (#101550) (#102078) [MC,AArch64] Remove unneeded STT_NOTYPE/STB_LOCAL code for mapping symbols and improve tests [NFC][DXIL] move replace/erase in DXIL intrinsic expansion to caller (#104626) [flang] Allow flexible name in llvm.ident (NFC) (#104543) [SandboxIR] Implement SwitchInst (#104641) [Clang] Fix sema checks thinking kernels aren't kernels (#104460) [asan] Pre-commit test with global constructor without any global (#104620) [clang-doc] add support for enums comments in html generation (#101282) Revert "[AArch64] Fold more load.x into load.i with large offset" [NFC][cxxabi] Apply `cp-to-llvm.sh` (#101970) [Clang] fix crash by avoiding invalidation of extern main declaration during strictness checks (#104594) [Mips] Fix fast isel for i16 bswap. (#103398) [libc] Add missing math definitions for round and scal for GPU (#104636) [ScalarizeMaskedMemIntr] Optimize splat non-constant masks (#104537) [SandboxIR] Implement ConstantInt (#104639) [SLP]Fix PR104637: do not create new nodes for fully overlapped non-schedulable nodes [DataLayout] Refactor parsing of "p" specification (#104583) [flang][cuda] Remove run line Reland "[flang][cuda][driver] Make sure flang does not switch to cc1 (#104613)" Revert "Reland "[flang][cuda][driver] Make sure flang does not switch to cc1 (#104613)"" [SandboxIR][Tracker][NFC] GenericSetterWithIdx (#104615) Reland "[flang][cuda][driver] Make sure flang does not switch to cc1 (#104613)" [MC] Drop whitespace padding in AMDGPU combined asm/disasm tests. (#104433) [gn build] Port 7ff377ba60bf [InstrProf] Support conditional counter updates (#102542) [Analysis] Fix null ptr dereference when using WriteGraph without branch probability info (#104102) [DirectX] Revert specialized createOp methods part of #101250 [VPlan] Compute cost for most opcodes in VPWidenRecipe (NFCI). (#98764) [PowerPC] Do not merge TLS constants within PPCMergeStringPool.cpp (#94059) Revert "[flang][cuda][driver] Make sure flang does not switch to cc1" (#104632) [AArch64][MachO] Encode @AUTH to ARM64_RELOC_AUTHENTICATED_POINTER. [flang][cuda][driver] Make sure flang does not switch to cc1 (#104613) AMDGPU: Rename type helper functions in atomic handling [libc] Fix generated header definitions in cmake (#104628) [libcxx][fix] Rename incorrect filename variable [SDAG] Read-only intrinsics must have WillReturn and !Throws attributes to be treated as loads (#99999) Re-Apply "[DXIL][Analysis] Implement enough of DXILResourceAnalysis for buffers" (#104517) [SelectionDAGISel] Use getSignedConstant for OPC_EmitInteger. [DirectX] Add missing Analysis usage to DXILResourceMDWrapper [AArch64] Remove apple-a7-sysreg. (#102709) Revert "[libc] Disable old headergen checks unless enabled" (#104627) [LLD, MachO] Default objc_relative_method_lists on MacOS10.16+/iOS14+ (#104519) [Clang][OMPX] Add the code generation for multi-dim `thread_limit` clause (#102717) [lldb][test] Mark gtest cases as XFAIL if the test suite is XFAIL (#102986) [APINotes] Support fields of C/C++ structs [Attributor] Enable `AAAddressSpace` in `OpenMPOpt` (#104363) [HLSL] Change default linkage of HLSL functions to internal (#95331) [bazel] Fix cyclic dependencies for macos (#104528) [libc] Disable old headergen checks unless enabled (#104522) [SandboxIR] Implement AtomicRMWInst (#104529) [RISCV] Move vmv.v.v peephole from SelectionDAG to RISCVVectorPeephole (#100367) [nfc] Improve testability of PGOInstrumentationGen (#104490) [test] Prevent generation of the bigendian code inside clang test CodeGen/bit-int-ubsan.c (#104607) [TableGen] Refactor Intrinsic handling in TableGen (#103980) [mlir][emitc] Add 'emitc.switch' op to the dialect (#102331) [SelectionDAG][X86] Add SelectionDAG::getSignedConstant and use it in a few places. (#104555) [mlir][AMDGPU] Implement AMDGPU DPP operation in MLIR. (#89233) [RISCV] Allow YAML file to control multilib selection (#98856) [mlir][vector] Group re-order patterns together (#102856) [lldb] Add Populate Methods for ELFLinuxPrPsInfo and ELFLinuxPrStatus (#104109) [HLSL] Flesh out basic type typedefs (#104479) [mlir][vector] Add more tests for ConvertVectorToLLVM (4/n) (#103391) [TableGen] Sign extend constants based on size for EmitIntegerMatcher. (#104550) [gn] Port AST/ByteCode #104552 [DAGCombiner] Remove TRUNCATE_(S/U)SAT_(S/U) from an assert that isn't tested. NFC (#104466) [RISCV] Don't support TRUNCATE_SSAT_U. (#104468) [Hexagon] Use range-based for loops (NFC) (#104538) [CodeGen] Use range-based for loops (NFC) (#104536) [Bazel] Port AST/ByteCode #104552 [mlir][linalg] Implement TilingInterface for winograd operators (#96184) [libc++][math] Fix acceptance of convertible types in `std::isnan()` and `std::isinf()` (#98952) [clang] Rename all AST/Interp stuff to AST/ByteCode (#104552) [mlir] [tosa] Bug fixes in shape inference pass (#104146) [libc++] Fix rejects-valid in std::span copy construction (#104500) [InstCombine] Handle commuted variant of sqrt transform [InstCombine] Thwart complexity-based canonicalization in sqrt test (NFC) [InstCombine] Preserve nsw in A + -B fold [InstCombine] Add nsw tests for A + -B fold (NFC) [include-cleaner] fix 32-bit buildbots after a426ffdee1ca7814f2684b6 [PhaseOrdering] Regenerate test checks (NFC) [InstCombine] Regenerate test checks (NFC) [X86] Fold extract_subvector(int_to_fp(x)) vXi32/vXf32 cases to match existing fp_to_int folds [InstCombine] Regenerate test checks (NFC) [mlir][spirv] Update documentation. NFC (#104584) [GlobalIsel] Revisit ext of ext. (#102769) [libc++] Fix backslash as root dir breaks lexically_relative, lexically_proximate and hash_value on Windows (#99780) [AArch64][GlobalISel] Disable fixed-point iteration in all Combiners [SLP][REVEC] Fix CreateInsertElement does not use the correct result if MinBWs applied. (#104558) Add FPMR register and update dependencies of FP8 instructions (#102910) [InstCombine] Fix incorrect zero ext in select of lshr/ashr fold [InstCombine] Add i128 test for select of lshr/ashr transform (NFC) [llvm-c] Add non-cstring versions of LLVMGetNamedFunction and LLVMGetNamedGlobal (#103396) [InstCombine] Fold an unsigned icmp of ucmp/scmp with a constant to an icmp of the original arguments (#104471) [clang][Interp] Fix classifying enum types (#104582) [clang] Add a new test for CWG2091 (#104573) [mlir][ArmSME][docs] Fix broken link (NFC) [compiler-rt] Stop using x86 builtin on AArch64 with GCC (#93890) [DataLayout] Refactor parsing of "ni" specification (#104546) [X86] SimplifyDemandedVectorEltsForTargetNode - reduce width of X86 conversions nodes when upper elements are not demanded. (#102882) [include-cleaner] Add handling for new/delete expressions (#104033) InferAddressSpaces: Convert test to generated checks [LAA] Use computeConstantDifference() (#103725) [SimplifyCFG] Add test for #104567 (NFC) [bazel] Port for 75cb9edf09fdc091e5bc0f3d46a96c2877735a39 [AMDGPU][NFC] AMDGPUUsage.rst: document corefile format (#104419) [lldb][NFC] Moved FindSchemeByProtocol() from Acceptor to Socket (#104439) [X86] lowerShuffleAsDecomposedShuffleMerge - don't lower to unpack+permute if either source is zero. [X86] Add shuffle tests for #104482 [clang][Interp][NFC] Remove Function::Loc [clang][NFC] Update `cxx_dr_status.html` [MLIR][GPU-LLVM] Add GPU to LLVM-SPV address space mapping (#102621) [DAG] SD Pattern Match: Operands patterns with VP Context (#103308) Revert "[clang][driver] Fix -print-target-triple OS version for apple targets" (#104563) [NFC][X86] Refactor: merge avx512_binop_all2 into avx512_binop_all (#104561) [RISCV] Merge bitrotate crash test into shuffle reverse tests. NFC [Passes] clang-format initialization files (NFC) [mlir][IR] Fix `checkFoldResult` error message (#104559) [RISCV] Merge shuffle reverse tests. NFC [RISCV] Use shufflevector in shuffle reverse tests. NFC [RISCV] Remove -riscv-v-vector-bits-max from reverse tests. NFC [flang][stack-arrays] Collect analysis results for OMP ws loops (#103590) [clang][Interp] Add scopes to conditional operator subexpressions (#104418) [RISCV] Simplify (srl (and X, Mask), Const) to TH_EXTU (#102802) [RISCV][NFC] Fix typo: "wererenamed" to "were renamed" (#104530) [RISCV] Lower fixed reverse vector_shuffles through vector_reverse (#104461) [asan] Fix build breakage from report_globals change [MLIR][test] Run SVE and SME Integration tests using qemu-aarch64 (#101568) [DAGCombiner] Don't let scalarizeBinOpOfSplats create illegal scalar MULHS/MULHU (#104518) [flang][cuda] Add version in libCufRuntime name (#104506) [mlir][tosa] Add missing check for new_shape of `tosa.reshape` (#104394) [Bitcode] Use range-based for loops (NFC) (#104534) [HLSL] update default validator version to 1.8. (#104040) [ScalarizeMaskedMemIntr] Pre-commit tests for splat optimizations (#104527) [Sparc] Remove dead code (NFC) (#104264) [Clang] [Sema] Error on reference types inside a union with msvc 1900+ (#102851) [Driver] Reject -Wa,-mrelax-relocations= for non-ELF [Analysis] Use a range-based for loop (NFC) (#104445) [llvm] Use llvm::any_of (NFC) (#104443) [PowerPC] Use range-based for loops (NFC) (#104410) [CodeGen] Use a range-based for loop (NFC) (#104408) [ORC] Gate testcase for 3e1d4ec671c on x86-64 and aarch64 target support. [builitins] Only try to use getauxval on Linux (#104047) [ORC] Add missing dependence on BinaryFormat library. [flang] Inline minval/maxval over elemental/designate (#103503) [Driver] Correctly handle -Wa,--crel -Wa,--no-crel [lldb] Correctly fix a usage of `PATH_MAX`, and fix unit tests (#104502) [gn build] Port 3e1d4ec671c5 [asan] Remove debug tracing from `report_globals` (#104404) [workflows] Add a new workflow for checking commit access qualifications (#93301) [Driver] Improve error message for -Wa,-x=unknown [SandboxIR] Implement UnaryOperator (#104509) [ORC] loadRelocatableObject: universal binary support, clearer errors (#104406) [RISCV] Use significant bits helpers in narrowing of build vectors [nfc] (#104511) [LLDB] Reapply #100443 SBSaveCore Thread list (#104497) [Driver] Reject -Wa,-mrelax-relocations= for non-x86 [docs] Stress out the branch naming scheme for Graphite. (#104499) [NFC][sanitizer] Use `UNLIKELY` in VReport/VPrintf (#104403) [asan] Reduce priority of "contiguous_container:" VPrintf (#104402) [libc] Make sure we have RISC-V f or d extension before using it (#104476) [Driver] Make CodeGenOptions name match MCTargetOptions names [Attributor][FIX] Ensure we do not use stale references (#104495) [libclang/python] Expose `clang_isBeforeInTranslationUnit` for `SourceRange.__contains__` [Clang] Add target triple to fix failing test (#104513) [clang][NFC] Fix table of contents in `Sema.h` [-Wunsafe-buffer-usage] Fix warning after #102953 [flang] Make sure range is valid (#104281) [MC] Replace hasAltEntry() with isMachO() MCAsmInfo: Replace some Mach-O specific check with isMachO(). NFC [asan] De-prioritize VReport `DTLS_Find` (#104401) Revert "[DXIL][Analysis] Implement enough of DXILResourceAnalysis for buffers" (#104504) [ubsan] Limit _BitInt ubsan tests to x86-64 platform only (#104494) Update load intrinsic attributes (#101562) [MC] Replace HasAggressiveSymbolFolding with SetDirectiveSuppressesReloc. NFC [SandboxIR] Implement BinaryOperator (#104121) [RISCV][GISel] Support nxv16p0 for RV32. (#101573) [nfc][ctx_prof] Remove the need for `PassBuilder` to know about `UseCtxProfile` (#104492) [Clang] [NFC] Rewrite constexpr vectors test to use element access (#102757) (lldb) Fix PATH_MAX for Windows (#104493) [libc] Add definition for `atan2l` on 64-bit long double platforms (#104489) Revert "[sanitizer] Remove GetCurrentThread nullness checks from Allocate" Reapply "Fix prctl to handle PR_GET_PDEATHSIG. (#101749)" (#104469) [-Wunsafe-buffer-usage] Fix a small bug recently found (#102953) [TargetLowering] Don't call SelectionDAG::getTargetLoweringInfo() from TargetLowering methods. NFC (#104197) [PowerPC][GlobalMerge] Enable GlobalMerge by default on AIX (#101226) [Clang] Implement C++26’s P2893R3 ‘Variadic friends’ (#101448) clang/AMDGPU: Emit atomicrmw for __builtin_amdgcn_global_atomic_fadd_{f32|f64} (#96872) [llvm-objdump] Fix a warning [bazel] Port 47721d46187f89c12a13d07b5857496301cf5d6e (#104481) [libc++] Remove the allocator<const T> extension (#102655) [Clang] handle both gnu and cpp11 attributes to ensure correct parsing inside extern block (#102864) [gn build] Port 47721d46187f [lldb] Realpath symlinks for breakpoints (#102223) llvm-objdump: ensure a MachO symbol isn't STAB before looking up secion (#86667) [test]Fix test error due to CRT dependency (#104462) [clang][Interp] Call move function for certain primitive types (#104437) [llvm-objdump] Print out xcoff file header for xcoff object file with option private-headers (#96350) [Clang] prevent null explicit object argument from being deduced (#104328) Revert "[Clang] Overflow Pattern Exclusions (#100272)" [flang][OpenMP] Fix 2 more regressions after #101009 (#101538) [InstCombine] Fold `ucmp/scmp(x, y) >> N` to `zext/sext(x < y)` when N is one less than the width of the result of `ucmp/scmp` (#104009) [bazel] Enable more lit self tests (#104285) Fix single thread stepping timeout race condition (#104195) [SPARC][Utilities] Add names for SPARC ELF flags in LLVM binary utilities (#102843) [SPARC][Driver] Add -m(no-)v8plus flags handling (#98713) [OpenMP] Add support for pause with omp_pause_stop_tool (#97100) Revert "[SLP][NFC]Remove unused using declarations, reduce mem usage in containers, NFC" [ValueTracking] Fix f16 fptosi range for large integers [InstSimplify] Add tests for f16 to i128 range (NFC) Revert "[Object][x86-64] Add support for `R_X86_64_GLOB_DAT` relocations. (#103029)" (#103497) [NFC] Fix spelling of "definitely". (#104455) [InstCombine][NFC] Add tests for shifts of constants by common factor (#103471) [OpenMP] Miscellaneous small code improvements (#95603) [clang][ExtractAPI] Emit environment component of target triple in SGF (#103273) [RISCV] Narrow indices to e16 for LMUL > 1 when lowering vector_reverse (#104427) [NFC] Fix code line exceeding 80 columns (#104428) [SLP][NFC]Remove unused using declarations, reduce mem usage in containers, NFC [Clang] Check explicit object parameter for defaulted operators properly (#100419) [LegalizeTypes][AMDGPU]: Allow for scalarization of insert_subvector (#104236) Allow optimization of __size_returning_new variants. (#102258) [SLP]Fix PR104422: Wrong value truncation [GlobalISel] Combiner: Fix warning after #102163 [SLP][NFC]Add a test with incorrect minbitwidth analysis for reduced operands [ubsan] Display correct runtime messages for negative _BitInt (#96240) Revert "[SLP][NFC]Remove unused using declarations, reduce mem usage in containers, NFC" [DataLayout] Extract loop body into a function to reduce nesting (NFC) (#104420) [clang][ExtractAPI] Compute inherited availability information (#103040) [CodeGen] Fix -Wcovered-switch-default in Combiner.cpp (NFC) [CompilerRT][Tests] Fix profile/darwin-proof-of-concept.c (#104237) [mlir][gpu] Fix typo in test filename (#104053) [LoongArch] Pre-commit tests for validating the merge base offset in vecotrs. NFC [AArch64] optimise SVE prefetch intrinsics with no active lanes (#103052) [AMDGPU] MCExpr printing helper with KnownBits support (#95951) [GlobalISel] Combiner: Observer-based DCE and retrying of combines [libcxx] Use `aligned_alloc` for testing instead of `posix_memalign` (#101748) [VPlan] Run VPlan optimizations on plans in native path. [clang][Interp] Use first field decl for Record field lookup (#104412) InferAddressSpaces: Restore non-instruction user check [AMDGPU][llvm-split] Fix another division by zero (#104421) Reapply "[lldb] Tolerate multiple compile units with the same DWO ID (#100577)" (#104041) [lldb-dap] Expose log path in extension settings (#103482) [clang][Interp] Pass callee decl to null_callee diagnostics (#104426) [llvm][CodeGen] Resolve issues when updating live intervals in window scheduler (#101945) [DataLayout] Add helper predicates to sort specifications (NFC) (#104417) InferAddressSpaces: Make getPredicatedAddrSpace less confusing (#104052) [AArch64] Fold more load.x into load.i with large offset [AArch64] merge index address with large offset into base address [AArch64] Add verification for MemOp immediate ranges (#97561) Revert "[Clang] [AST] Fix placeholder return type name mangling for MSVC 1920+ / VS2019+ (#102848)" [analyzer] Do not reason about locations passed as inline asm input (#103714) [NFC][mlir][scf] Fix misspelling of replace (#101683) Revert "Remove empty line." [mlir][Transforms] Dialect conversion: Build unresolved materialization for replaced ops (#101514) Remove empty line. [DirectX] Use a more consistent pass name for DXILTranslateMetadata [Flang][OpenMP] Move assert for wrapper syms and block args to genLoopNestOp (#103731) [clang][driver] Fix -print-target-triple OS version for apple targets (#104037) [bazel] Port for 141536544f4ec1d1bf24256157f4ff1a3bc07dae [DAG] Adding m_FPToUI and m_FPToSI to SDPatternMatch.h (#104044) [llvm][Docs] `_or_null` -> `_if_present` in Programmer's Manual (#98586) [MLIR][LLVM]: Add an IR utility to perform slice walking (#103053) [lldb][test] Mark sys_info zdump test unsupported on 32 bit Arm Linux [flang][test] Run Driver/fveclib-codegen.f90 for aarch64 and x86_64 (#103730) [lldb] Remove Phabricator usernames from Code Owners file (#102590) [DataLayout] Move '*AlignElem' structs and enum inside DataLayout (NFC) (#103723) [flang][test] Fix Lower/default-initialization-globals.f90 on SPARC (#103722) [mlir][test] XFAIL little-endian-only tests on SPARC (#103726) [UnitTests] Convert some data layout parsing tests to GTest (#104346) Fix warnings in #102848 [-Wunused-but-set-variable] [VPlan] Move VPWidenStoreRecipe::execute to VPlanRecipes.cpp (NFC). [include-cleaner] Remove two commented-out lines of code. [mlir][tosa] Add verifier for `tosa.table` (#103708) [X86][MC] Remove CMPCCXADD's CondCode flavor. (#103898) [ctx_prof] Remove an unneeded include in CtxProfAnalysis.cpp Intrinsic: introduce minimumnum and maximumnum for IR and SelectionDAG (#96649) Remove failing test until it can be fixed properly. [Clang][NFC] Move FindCountedByField into FieldDecl (#104235) Fix testcases. Use -emit-llvm and not -S. Use LABEL checking. [Clang] [AST] Fix placeholder return type name mangling for MSVC 1920+ / VS2019+ (#102848) [LLDB][OSX] Removed semi colon generating a warning during build (#104398) [OpenMP] Use range-based for loops (NFC) (#103511) [RISCV] Implement RISCVTTIImpl::shouldConsiderAddressTypePromotion for RISCV (#102560) [lld-macho] Fix crash: ObjC category merge + relative method lists (#104081) [ELF][NFC] Allow non-GotSection for addAddendOnlyRelocIfNonPreemptible (#104228) [ctx_prof] CtxProfAnalysis: populate module data (#102930) [sanitizer] Remove GetCurrentThread nullness checks from Allocate Remove '-emit-llvm' and use '-triple' Use clang_cc1 and specify the target explicitly. utils/git: Add linkify script. [mlir][MemRef] Add more ops to narrow type support, strided metadata expansion (#102228) [Clang] Overflow Pattern Exclusions (#100272) [Clang] Error on extraneous template headers by default. (#104046) [Sanitizers] Disable prctl test on Android. [RISCV] Don't combine (sext_inreg (fmv_x_anyexth X), i16) with Zhinx. Remove unused variable, and unneeded extract element instruction (#103489) [bazel] Port 4bac8fd8904904bc7d502f39851eef50b5afff73 (#104278) Reland "[flang][cuda] Use cuda runtime API #103488" [Clang] Add `__CLANG_GPU_DISABLE_MATH_WRAPPERS` macro for offloading math (#98234) [llvm-lit] Fix Unhashable TypeError when using lit's internal shell (#101590) [llvm-lit][test][NFC] Moved cat command tests into separate lit test file (#102366) [RISCV] Add signext attribute to return of fmv_x_w test in float-convert.ll. NFC [DXIL][Analysis] Implement enough of DXILResourceAnalysis for buffers Reapply "[Attributor][AMDGPU] Enable AAIndirectCallInfo for AMDAttributor (#100952)" [DXIL][Analysis] Boilerplate for DXILResourceAnalysis pass [mlir] Add bubbling patterns for non intersecting reshapes (#103401) Revert "[flang][cuda] Use cuda runtime API" (#104232) [libc++] Remove non-existent LWG issue from the .csv files [RISCV][GISel] Remove support for s32 G_VAARG on RV64. (#102533) [NVPTX] Add idp2a, idp4a intrinsics (#102763) [X86] Check if an invoked function clobbers fp or bp (#103446) [flang][cuda] Use cuda runtime API (#103488) [SLP][NFC]Remove unused using declarations, reduce mem usage in containers, NFC [TargetLowering] Remove unncessary null check. NFC [OpenMP] Fix buildbot failing on allocator test [clang] Turn -Wenum-constexpr-conversion into a hard error (#102364) [libcxx] Adjust inline assembly constraints for the AMDGPU target (#101747) [lld-macho] Make relative method lists work on x86-64 (#103905) [libcxx] Disable invalid `__start/__stop` reference on NVPTX (#99381) [libcxx] Add fallback to standard C when `unistd` is unavailable (#102005) [Clang] Fix 'nvlink-wrapper' not ignoring `-plugin` like lld does (#104056) [OpenMP] Implement 'omp_alloc' on the device (#102526) [vscode-mlir] Added per-LSP-server executable arguments (#79671) [flang] Read the extra field from the in box when doing reboxing (#102992) [HLSL] Split out the ROV attribute from the resource attribute, make it a new spellable attribute. (#102414) [libc++] Fix ambiguous constructors for std::complex and std::optional (#103409) AMDGPU: Avoid manually reconstructing atomicrmw (#103769) [libc] Fix 'float type' incorrectly being used as the return type [Clang] Adjust concept definition locus (#103867) [SandboxIR] Implement Instruction flags (#103343) [AArch64] Add some uxtw peephole tests. NFC AMDGPU: Stop promoting allocas with addrspacecast users (#104051) [NVPTX] Fix typo causing GCC warning (#103045) [attributes][-Wunsafe-buffer-usage] Support adding unsafe_buffer_usage attribute to struct fields (#101585) [RISCV][GISel] Support G_SEXT_INREG for Zbb. (#102682) [SystemZ][z/OS] Continuation of __ptr32 support (#103393) [X86] concat(permv3(x0,m0,y0),permv3(x0,m1,y0)) -> permv3(concat(x0,u),m3,concat(y0,u)) [X86] Add test coverage for #103564 [X86] combineEXTRACT_SUBVECTOR - treat oneuse extractions from loads as free [libcxx] Set `_LIBCPP_HAS_CLOCK_GETTIME` for GPU targets (#99243) Fix bazel build (#104054) CodeGen/NewPM: Add ExpandLarge* passes to isel IR passes (#102815) AMDGPU/NewPM: Fill out addPreISelPasses (#102814) [libc++] Add mechanical update to CxxPapers.rst to git-blame-ignore-revs [libc++] Mechanical adjustments for the C++14 Paper status files [LLDB][OSX] Add a fallback support exe directory (#103458) [TextAPI] Use range-based for loops (NFC) (#103530) [mlir][vector] Add tests for `populateSinkVectorBroadcastPatterns` (1/n) (#102286) [libc++] Remove duplicate C++17 LWG issues from the CSVs [clang] Implement `__builtin_is_implicit_lifetime()` (#101807) Fix prctl test to execute all test cases if the first condition fails. (#102987) Revert "[scudo] Separated committed and decommitted entries." (#104045) [SelectionDAG] Scalarize binary ops of splats be…

Update codegen for func param with transparent_union attr to be that of the first union member. This is a followup to #101738 to fix non-ppc codegen and closes #76773.

Update codegen for func param with transparent_union attr to be that of the first union member. This is a followup to llvm#101738 to fix non-ppc codegen and closes llvm#76773.

commit 56905dab7da50bccfcceaeb496b206ff476127e1 Author: JinjinLi868 <lijinjin.868@bytedance.com> Date: Tue Sep 10 10:47:33 2024 +0800 [clang] fix half && bfloat16 convert node expr codegen (#89051) Data type conversion between fp16 and bf16 will generate fptrunc and fpextend nodes, but they are actually bitcast nodes. commit ffcff4af59712792712b33648f8ea148b299c364 Author: Yingwei Zheng <dtcxzyw2333@gmail.com> Date: Tue Sep 10 10:38:21 2024 +0800 [ValueTracking] Infer is-power-of-2 from assumptions. (#107745) This patch tries to infer is-power-of-2 from assumptions. I don't see that this kind of assumption exists in my dataset. Related issue: https://github.com/rust-lang/rust/issues/129795 Close https://github.com/llvm/llvm-project/issues/58996. commit eb0e4b1415800e34b86319ce1d57ad074d5ca202 Author: Petr Hosek <phosek@google.com> Date: Mon Sep 9 19:21:59 2024 -0700 [Fuzzer] Passthrough zlib CMake paths into the test (#107926) We shouldn't assume that we're using system zlib installation. commit 761bf333e378b52614cf36cd5db2837d5e4e0ae4 Author: Yuxuan Chen <ych@fb.com> Date: Mon Sep 9 18:57:39 2024 -0700 [LLVM][Coroutines] Switch CoroAnnotationElidePass to a FunctionPass (#107897) After landing https://github.com/llvm/llvm-project/pull/99285 we found that the call graph update was causing the following crash when expensive checks are turned on ``` llvm-project/llvm/lib/Analysis/CGSCCPassManager.cpp:982: LazyCallGraph::SCC &updateCGAndAnalysisManagerForPass(LazyCallGraph &, LazyCallGraph::SCC &, LazyCallGraph::Node &, CGSCCAnalysisManager &, CGSCCUpdateResult &, FunctionAnalysisManager &, bool): Assertion `(RC == &TargetRC || RC->isAncestorOf(Targe tRC)) && "New call edge is not trivial!"' failed. ``` I have to admit I believe that the call graph update process I did for that patch could be wrong. After reading the code in `CGSCCToFunctionPassAdaptor`, I am convinced that `CoroAnnotationElidePass` can be a FunctionPass and rely on the adaptor to update the call graph for us, so long as we properly invalidate the caller's analyses. After this patch, `llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll` no longer fails under expensive checks. commit 7a8e9dfe5cc6f049f918e528ef476d9e7aada8a5 Author: Jordan Rupprecht <rupprecht@google.com> Date: Mon Sep 9 20:34:43 2024 -0500 [bazel][libc][NFC] Add missing layering deps (#107947) After 277371943fa48f2550df02870951f5e5a77efef5 e.g. ``` external/llvm-project/libc/test/src/math/smoke/NextTowardTest.h:12:10: error: module llvm-project//libc/test/src/math/smoke:nexttowardf_test does not depend on a module exporting 'src/__support/CPP/bit.h' ``` commit 1ca411ca451e0e86caf9207779616f32ed9fd908 Author: wanglei <wanglei@loongson.cn> Date: Tue Sep 10 09:28:15 2024 +0800 [LoongArch] Codegen for concat_vectors with LASX Fixes: #107355 Reviewed By: SixWeining Pull Request: https://github.com/llvm/llvm-project/pull/107523 commit e64a1c00c1d612dccd976c06fdac85afa3b06fbe Author: Mircea Trofin <mtrofin@google.com> Date: Mon Sep 9 18:25:50 2024 -0700 Fix unintended extra commit in PR #107499 commit f7479b5ff43261a20258743da5fa583a0c729564 Author: Rahul Joshi <rjoshi@nvidia.com> Date: Mon Sep 9 18:24:07 2024 -0700 [NFC][TableGen] Simplify DirectiveEmitter using range for loops (#107909) Make constructors that take const Record * implicit, allowing us to simplify some range based loops to use that class instance as the loop variable. Change remaining constructor calls to use () instead of {} to construct objects. commit a111f9119a5ec77c19a514ec09454218f739454f Author: Yingwei Zheng <dtcxzyw2333@gmail.com> Date: Tue Sep 10 09:19:39 2024 +0800 [LoongArch][ISel] Check the number of sign bits in `PatGprGpr_32` (#107432) After https://github.com/llvm/llvm-project/pull/92205, LoongArch ISel selects `div.w` for `trunc i64 (sdiv i64 3202030857, (sext i32 X to i64)) to i32`. It is incorrect since `3202030857` is not a signed 32-bit constant. It will produce wrong result when `X == 2`: https://alive2.llvm.org/ce/z/pzfGZZ This patch adds additional `sexti32` checks to operands of `PatGprGpr_32`. Alive2 proof: https://alive2.llvm.org/ce/z/AkH5Mp Fix #107414. commit f3b4e47b34e59625e2c8420ce8bf789373177d6d Author: Longsheng Mou <longshengmou@gmail.com> Date: Tue Sep 10 09:19:22 2024 +0800 [mlir][linalg][NFC] Drop redundant rankReductionStrategy (#107875) This patch drop redundant rankReductionStrategy in `populateFoldUnitExtentDimsViaSlicesPatterns` and fixes comment typos. commit 3b2261809471a018de50e745c0d475b048c66fd4 Author: Mircea Trofin <mtrofin@google.com> Date: Mon Sep 9 18:16:24 2024 -0700 [ctx_prof] Insert the ctx prof flattener after the module inliner (#107499) This patch enables experimenting with the contextual profile. ICP is currently disabled in this case - will reenable it subsequently. Also subsequently the inline cost model / decision making would be updated to be context-aware. Right now, this just achieves "complete use" of the profile, in that it's ingested, maintained, and sunk to a flat profile when not needed anymore. Issue [#89287](https://github.com/llvm/llvm-project/issues/89287) commit b0d2411b53a0b55baf6d6dc7986d285ce59807fa Author: Alex MacLean <amaclean@nvidia.com> Date: Mon Sep 9 17:37:09 2024 -0700 [NVPTX] Support copysign PTX instruction (#107800) Lower `fcopysign` SDNodes into `copysign` PTX instructions where possible. See [PTX ISA: 9.7.3.2. Floating Point Instructions: copysign] (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-copysign). commit 81ef8e2fdbdfac4e186e12a874242b294d05d4e0 Author: Vitaly Buka <vitalybuka@google.com> Date: Mon Sep 9 17:00:06 2024 -0700 [NFC][sanitizer] Extract GetDTLSRange (#107934) commit ae02211eaef305f957b419e5c39499aa472b956e Author: vporpo <vporpodas@google.com> Date: Mon Sep 9 16:52:54 2024 -0700 [SandboxIR] Implement UndefValue (#107628) This patch implements sandboxir::UndefValue mirroring llvm::UndefValue. commit 33c1325a73c4bf6bacdb865c2550038afe4377d2 Author: Anton Korobeynikov <anton@korobeynikov.info> Date: Mon Sep 9 16:34:41 2024 -0700 [PAC] Make __is_function_overridden pauth-aware on ELF platforms (#107498) Apparently, there are two almost identical implementations: one for MachO and another one for ELF. The ELF bits somehow slipped while https://github.com/llvm/llvm-project/pull/84573 was reviewed. The particular implementation is identical to MachO case. commit 88bd507dc2dd9c235b54d718cf84e4ef80d94bc9 Author: Noah Goldstein <goldstein.w.n@gmail.com> Date: Mon Sep 9 11:07:38 2024 -0700 [X86] Handle shifts + and in `LowerSELECTWithCmpZero` shifts are the same as sub where rhs == 0 is identity. and is the inverted case where: `SELECT (AND(X,1) == 0), (AND Y, Z), Y` -> `(AND Y, (OR NEG(AND(X, 1)), Z))` With -1 as the identity. Closes #107910 commit d148a1a40461ed27863f4b17ac2bd5914499f413 Author: Noah Goldstein <goldstein.w.n@gmail.com> Date: Mon Sep 9 11:07:36 2024 -0700 [X86] Add tests support shifts + and in `LowerSELECTWithCmpZero`; NFC commit 26b786ae2f15bfbf6f0925856a788ae0bfb2f8c1 Author: Artem Belevich <tra@google.com> Date: Mon Sep 9 16:15:00 2024 -0700 [NVPTX] Restrict combining to properly aligned v16i8 vectors. (#107919) Fixes generation of invalid loads leading to misaligned access errors. The bug got exposed by SLP vectorizer change ec360d6 which allowed SLP to produce `v16i8` vectors. Also updated the tests to use automatic check generator. commit f12e10b513686a12f20f0c897dcc9ffc00cbce09 Author: vporpo <vporpodas@google.com> Date: Mon Sep 9 15:41:30 2024 -0700 [SandboxVec] Implement Pass class (#107617) This patch implements the Pass base class and the FunctionPass sub-class that operate on Sandbox IR. commit bdf02249e7f8f95177ff58c881caf219699acb98 Author: Rahul Joshi <rjoshi@nvidia.com> Date: Mon Sep 9 14:33:21 2024 -0700 [TableGen] Change CGIOperandList::OperandInfo::Rec to const pointer (#107858) Change CGIOperandList::OperandInfo::Rec and CGIOperandList::TheDef to const pointer. This is a part of effort to have better const correctness in TableGen backends: https://discourse.llvm.org/t/psa-planned-changes-to-tablegen-getallderiveddefinitions-api-potential-downstream-breakages/81089 commit a9a5a18a0e99b0251c0fe6ce61c5e699bf6b379b Author: Tim Gymnich <tgymnich@icloud.com> Date: Mon Sep 9 23:27:27 2024 +0200 [SPIRV] Add sign intrinsic part 1 (#101987) partially fixes #70078 - Added `int_spv_sign` intrinsic in `IntrinsicsSPIRV.td` - Added lowering and map to `int_spv_sign in `SPIRVInstructionSelector.cpp` - Added SPIR-V backend test case in `llvm/test/CodeGen/SPIRV/hlsl-intrinsics/sign.ll` - https://github.com/llvm/llvm-project/pull/101988 - https://github.com/llvm/llvm-project/pull/101989 commit 66e9078f827383f77c1c239f6c09f2b07a963649 Author: Steven Wu <stevenwu@apple.com> Date: Mon Sep 9 14:12:12 2024 -0700 [LTO] Fix a use-after-free in legacy LTO C APIs (#107896) Fix a bug that `lto_runtime_lib_symbols_list` is returning the address of a local variable that will be freed when getting out of scope. This is a regression from #98512 that rewrites the runtime libcall function lists into a SmallVector. rdar://135559037 commit d9a996020394a8181d17e4f0a0fc89d59371f9af Author: ChiaHungDuan <chiahungduan@google.com> Date: Mon Sep 9 13:59:03 2024 -0700 [scudo] Add fragmentation info for each memory group (#107475) This information helps with tuning the heuristic of selecting memory groups to release the unused pages. commit 6f8d2781f604cfcf9ea6facecc0bea8e4d682e1e Author: Sterling-Augustine <56981066+Sterling-Augustine@users.noreply.github.com> Date: Mon Sep 9 20:49:49 2024 +0000 [SandboxIR] Add missing VectorType functions (#107650) Fills in many missing functions from VectorType commit 53a81d4d26f0409de8a0655d7af90f2bea222a12 Author: Charlie Barto <chbarto@microsoft.com> Date: Mon Sep 9 13:41:08 2024 -0700 Reland [asan][windows] Eliminate the static asan runtime on windows (#107899) This reapplies 8fa66c6ca7272268747835a0e86805307b62399c ([asan][windows] Eliminate the static asan runtime on windows) for a second time. That PR bounced off the tests because it caused failures in the other sanitizer runtimes, these have been fixed by only building interception, sanitizer_common, and asan with /MD, and continuing to build the rest of the runtimes with /MT. This does mean that any usage of the static ubsan/fuzzer/etc runtimes will mean you're mixing different runtime library linkages in the same app, the interception, sanitizer_common, and asan runtimes are designed for this, however it does result in some linker warnings. Additionally, it turns out when building in release-mode with LLVM_ENABLE_PDBs the build system forced /OPT:ICF. This totally breaks asan's "new" method of doing "weak" functions on windows, and so /OPT:NOICF was explicitly added to asan's link flags. --------- Co-authored-by: Amy Wishnousky <amyw@microsoft.com> commit 34034381b7d54da864f8794f578d9c501d6d4f3b Author: Florian Hahn <flo@fhahn.com> Date: Mon Sep 9 21:35:59 2024 +0100 [VPlan] Consistently use VTC for vector trip count in vplan-printing.ll. The inconsistency surfaced in https://github.com/llvm/llvm-project/pull/95305. Split off the reduce the diff. commit 3f22756f391e20040fa3581206b77c409433bd9f Author: Justin Bogner <mail@justinbogner.com> Date: Mon Sep 9 13:21:22 2024 -0700 [DirectX] Lower `@llvm.dx.typedBufferLoad` to DXIL ops The `@llvm.dx.typedBufferLoad` intrinsic is lowered to `@dx.op.bufferLoad`. There's some complexity here in translating to scalarized IR, which I've abstracted out into a function that should be useful for samples, gathers, and CBuffer loads. I've also updated the DXILResources.rst docs to match what I'm doing here and the proposal in llvm/wg-hlsl#59. I've removed the content about stores and raw buffers for now with the expectation that it will be added along with the work. Note that this change includes a bit of a hack in how it deals with `getOverloadKind` for the `dx.ResRet` types - we need to adjust how we deal with operation overloads to generate a table directly rather than proxy through the OverloadKind enum, but that's left for a later change here. Part of #91367 Pull Request: https://github.com/llvm/llvm-project/pull/104252 commit 985600dcd3fcef4095097bea5b556e84c8143a7f Author: Rahul Joshi <rjoshi@nvidia.com> Date: Mon Sep 9 13:09:53 2024 -0700 [TableGen] Migrate CodeGenHWModes to use const RecordKeeper (#107851) Migrate CodeGenHWModes to use const RecordKeeper and const Record pointers. This is a part of effort to have better const correctness in TableGen backends: https://discourse.llvm.org/t/psa-planned-changes-to-tablegen-getallderiveddefinitions-api-potential-downstream-breakages/81089 commit b3d2d5039b9b8aa10a86c593387f200b15c02aef Author: Alexey Bataev <a.bataev@outlook.com> Date: Mon Sep 9 12:32:45 2024 -0700 [SLP][NFC]Reorder code for better structural complexity, NFC commit e62bf7cd0beb530bc0842bb7aa8ff162607a82b9 Author: Sean Perry <perry@ca.ibm.com> Date: Mon Sep 9 15:24:16 2024 -0400 [z/OS] Set the default arch for z/OS to be arch10 (#89854) The default arch level on z/OS is arch10. Update the code so z/OS has arch10 without changing the default for zLinux. commit 98815f7878c3240e27f516e331255532087f5fcb Author: c8ef <c8ef@outlook.com> Date: Tue Sep 10 03:13:29 2024 +0800 [clang][docs] Add clang-tutor to External Clang Examples (#107665) commit 3681d8552fb9e6cb15e9d45849ff2e34a25c518e Author: Nikita Popov <nikita.ppv@gmail.com> Date: Mon Sep 9 21:10:12 2024 +0200 Revert "[Clang][Sema] Use the correct lookup context when building overloaded 'operator->' in the current instantiation (#104458)" This reverts commit 3cdb30ebbc18fa894d3bd67aebcff76ce7c741ac. Breaks clang bootstrap. commit ab82f83dae065a9aa4716618524eddf4aad5fcf0 Author: Mingming Liu <mingmingl@google.com> Date: Mon Sep 9 11:53:07 2024 -0700 [LTO][NFC] Fix forward declaration (#107902) Fix after https://github.com/llvm/llvm-project/pull/107792 commit 6776d65ceaea84fe815845da3c41b2f1621521fb Author: NoumanAmir-10xe <66777536+NoumanAmir657@users.noreply.github.com> Date: Mon Sep 9 23:49:22 2024 +0500 [libc++] Implement LWG3953 (#107535) Closes #105303 commit eec1ee8ef10820c61c03b00b68d242d8c87d478a Author: Abhina Sree <Abhina.Sreeskantharajan@ibm.com> Date: Mon Sep 9 14:37:53 2024 -0400 [SystemZ][z/OS] Enable lit testing for z/OS (#107631) This patch fixes various errors to enable llvm-lit to run on z/OS commit 78c1009c3e54e59b6177deb4d74dd3a3083a3f01 Author: Rahul Joshi <rjoshi@nvidia.com> Date: Mon Sep 9 11:35:13 2024 -0700 [NFC][TableGen] DirectiveEmitter code cleanup (#107775) Eliminate unnecessary llvm:: prefix as this code is in llvm namespace. Use ArrayRef<> instead of std::vector references when appropriate. Use .empty() instead of .size() == 0. commit 99ea357f7b5e7e01e42b8d68dd211dc304b3115b Author: Aiden Grossman <aidengrossman@google.com> Date: Mon Sep 9 11:34:53 2024 -0700 [MLGO] Fix logging verbosity in scripts (#107818) This patch fixes issues related to logging verbosity in the MLGO python scripts. This was an oversight when converting from absl.logging to the python logging API as absl natively supports a --verbosity flag to set the desired logging level. This patch adds a flag to support similar functionality in Python's logging library and additionally updates docstrings where relevant to point to the new values. commit a7c26aaf2eca61cd5d885194872471c63d68f3bc Author: Zequan Wu <zequanwu@google.com> Date: Mon Sep 9 11:34:13 2024 -0700 Revert "[Coverage] Ignore unused functions if the count is 0." (#107901) Reverts llvm/llvm-project#107661 Breaks llvm-project/llvm/unittests/ProfileData/CoverageMappingTest.cpp commit 02fff933d0eff71db8ff44f4acf1641bb1ad4d38 Author: Aiden Grossman <aidengrossman@google.com> Date: Mon Sep 9 18:28:23 2024 +0000 [MLGO] Remove unused imports Remove unused imports from python files in the MLGO library. commit 048e46ad53bedef076df868524f0a15eb7cbd38c Author: Brian Cain <bcain@quicinc.com> Date: Mon Sep 9 13:27:13 2024 -0500 [clang, hexagon] Update copyright, license text (#107161) When this file was first contributed - `28b01c59c93d ([hexagon] Add {hvx,}hexagon_{protos,circ_brev...}, 2021-06-30)` - I incorrectly included a QuIC copyright statement with "All rights reserved". I should have contributed this file with the `Apache+LLVM exception` license. commit b1b9b7b853fc4301aedd9ad6b7c22b75f5546b94 Author: Eduard Satdarov <sath@yandex-team.ru> Date: Mon Sep 9 21:17:53 2024 +0300 [libc++] Cache file attributes during directory iteration (#93316) This patch adds caching of file attributes during directory iteration on Windows. This improves the performance when working with files being iterated on in a directory. commit 09b231cb38755e1bd122dbab9c57c4847bf64204 Author: Mingming Liu <mingmingl@google.com> Date: Mon Sep 9 11:16:58 2024 -0700 Re-apply "[NFCI][LTO][lld] Optimize away symbol copies within LTO global resolution in ELF" (#107792) Fix the use-after-free bug and re-apply https://github.com/llvm/llvm-project/pull/106193 * Without the fix, the string referenced by `objSym.Name` could be destroyed even if string saver keeps a copy of the referenced string. This caused use-after-free. * The fix ([latest commit](https://github.com/llvm/llvm-project/pull/107792/commits/9776ed44cfb26172480145aed8f59ba78a6fa2ea)) updates `objSym.Name` to reference (via `StringRef`) the string saver's copy. Test: 1. For `lld/test/ELF/lto/asmundef.ll`, its test failure is reproducible with `-DLLVM_USE_SANITIZER=Address` and gone with the fix. 3. Run all tests by following https://github.com/google/sanitizers/wiki/SanitizerBotReproduceBuild#try-local-changes. * Without the fix, `ELF/lto/asmundef.ll` aborted the multi-stage test at `@@@BUILD_STEP stage2/asan_ubsan check@@@`, defined [here](https://github.com/llvm/llvm-zorg/blob/main/zorg/buildbot/builders/sanitizers/buildbot_fast.sh#L30) * With the fix, the [multi-stage test](https://github.com/llvm/llvm-zorg/blob/main/zorg/buildbot/builders/sanitizers/buildbot_fast.sh) pass stage2 {asan, ubsan, masan}. This is also the test used by https://lab.llvm.org/buildbot/#/builders/169 **Original commit message** `StringMap<T>` creates a [copy of the string](https://github.com/llvm/llvm-project/blob/d4c519e7b2ac21350ec08b23eda44bf4a2d3c974/llvm/include/llvm/ADT/StringMapEntry.h#L55-L58) for entry insertions and intentionally keep copies [since the implementation optimizes string memory usage](https://github.com/llvm/llvm-project/blob/d4c519e7b2ac21350ec08b23eda44bf4a2d3c974/llvm/include/llvm/ADT/StringMap.h#L124). On the other hand, linker keeps copies of symbol names [1] in `lld::elf::parseFiles` [2] before invoking `compileBitcodeFiles` [3]. This change proposes to optimize away string copies inside [LTO::GlobalResolutions](https://github.com/llvm/llvm-project/blob/24e791b4164986a1ca7776e3ae0292ef20d20c47/llvm/include/llvm/LTO/LTO.h#L409), which will make LTO indexing more memory efficient for ELF. There are similar opportunities for other (COFF, wasm, MachO) formats. The optimization takes place for lld (ELF) only. For the rest of use cases (gold plugin, `llvm-lto2`, etc), LTO owns a string saver to keep copies and use global resolution key for de-duplication. Together with @kazutakahirata's work to make `ComputeCrossModuleImport` more memory efficient, we see a ~20% peak memory usage reduction in a binary where peak memory usage needs to go down. Thanks to the optimization in https://github.com/llvm/llvm-project/commit/329ba523ccbbe68a12434926c92fd9a86494d958, the max (as opposed to the sum) of `ComputeCrossModuleImport` or `GlobalResolution` shows up in peak memory usage. * Regarding correctness, the set of [resolved](https://github.com/llvm/llvm-project/blob/80c47ad3aec9d7f22e1b1bdc88960a91b66f89f1/llvm/lib/LTO/LTO.cpp#L739) [per-module symbols](https://github.com/llvm/llvm-project/blob/80c47ad3aec9d7f22e1b1bdc88960a91b66f89f1/llvm/include/llvm/LTO/LTO.h#L188-L191) is a subset of [llvm::lto::InputFile::Symbols](https://github.com/llvm/llvm-project/blob/80c47ad3aec9d7f22e1b1bdc88960a91b66f89f1/llvm/include/llvm/LTO/LTO.h#L120). And bitcode symbol parsing saves symbol name when iterating `obj->symbols` in `BitcodeFile::parse` already. This change updates `BitcodeFile::parseLazy` to keep copies of per-module undefined symbols. * Presumably the undefined symbols in a LTO unit (copied in this patch in linker unique saver) is a small set compared with the set of symbols in global-resolution (copied before this patch), making this a worthwhile trade-off. Benchmarking this change alone shows measurable memory savings across various benchmarks. [1] ELF https://github.com/llvm/llvm-project/blob/1cea5c2138bef3d8fec75508df6dbb858e6e3560/lld/ELF/InputFiles.cpp#L1748 [2] https://github.com/llvm/llvm-project/blob/ef7b18a53c0d186dcda1e322be6035407fdedb55/lld/ELF/Driver.cpp#L2863 [3] https://github.com/llvm/llvm-project/blob/ef7b18a53c0d186dcda1e322be6035407fdedb55/lld/ELF/Driver.cpp#L2995 commit 277371943fa48f2550df02870951f5e5a77efef5 Author: lntue <35648136+lntue@users.noreply.github.com> Date: Mon Sep 9 14:15:46 2024 -0400 [libc][bazel] Update bazel overlay for math functions and their tests. (#107862) commit 4a501a4556bb191bd6eb5398a7330a28437e5087 Author: Artem Belevich <tra@google.com> Date: Mon Sep 9 11:14:41 2024 -0700 [CUDA/HIP] propagate -cuid to a host-only compilation. (#107483) Right now we're bailing out too early, and `-cuid` does not get set for the host-only compilations. commit 6850410562123b6e4fbb039e7ba4a2325b994b84 Author: Zequan Wu <zequanwu@google.com> Date: Mon Sep 9 11:14:21 2024 -0700 [Coverage] Ignore unused functions if the count is 0. (#107661) Relax the condition to ignore the case when count is 0. This fixes a bug on https://github.com/llvm/llvm-project/commit/381e9d2386facea7f2acc0f8c16a6d0731267f80. This was reported at https://discourse.llvm.org/t/coverage-from-multiple-test-executables/81024/. commit 5f74671c85877e03622e8d308aee15ed73ccee7c Author: Tarun Prabhu <tarun@lanl.gov> Date: Mon Sep 9 12:10:16 2024 -0600 [flang][Driver] Support -Xlinker in flang (#107472) Partially addresses: https://github.com/llvm/llvm-project/issues/89888 commit 0f349b7a9cde0080e626f6cfd362885341eb63b4 Author: Sarah Spall <spall@users.noreply.github.com> Date: Mon Sep 9 11:07:20 2024 -0700 [HLSL] Implement support for HLSL intrinsic - select (#107129) Implement support for HLSL intrinsic select. This would close issue #75377 commit 34e3007c69eb91c16f23f20548305a2fb8feb75e Author: Kazu Hirata <kazu@google.com> Date: Mon Sep 9 10:51:52 2024 -0700 [ARM] Fix a warning This patch fixes: llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h:214:5: error: default label in switch which covers all enumeration values [-Werror,-Wcovered-switch-default] commit 6cc0138ca3dbdb21f4c4a5fa39cf05c38da4bb75 Author: Chris B <chris.bieneman@me.com> Date: Mon Sep 9 12:34:50 2024 -0500 Fix implicit conversion rank ordering (#106811) DXC prefers dimension-preserving conversions over precision-losing conversions. This means a double4 -> float4 conversion is preferred over a double4 -> double3 or double4 -> double conversion. commit cd8229bb4bfa4de45528ce101d9dceb9be8bff9e Author: Valentin Clement (バレンタインクレメン) <clementval@gmail.com> Date: Mon Sep 9 10:32:35 2024 -0700 [flang][cuda] Support c_devptr in c_f_pointer intrinsic (#107470) This is an extension of CUDA Fortran. The iso_c_binding intrinsic can accept a `TYPE(c_devptr)` as its first argument. This patch relax the semantic check to accept it and update the lowering to unwrap the cptr field from the c_devptr. commit 7543d09b852695187d08aa5d56d50016fea8f706 Author: Andrew Ng <andrew.ng@sony.com> Date: Mon Sep 9 18:18:41 2024 +0100 [llvm-ml] Fix RIP-relative addressing for ptr operands (#107618) Fixes #54773 commit 7f90479b2300b3758fd90015a2e6e7e94cfcf1e7 Author: Leandro Lupori <leandro.lupori@linaro.org> Date: Mon Sep 9 14:09:45 2024 -0300 [flang][OpenMP] Don't abort when default is used on an invalid directive (#107586) The previous assert was not considering programs with semantic errors. Fixes https://github.com/llvm/llvm-project/issues/107495 Fixes https://github.com/llvm/llvm-project/issues/93437 commit 95831f012d76558fe78f5f3e71b1003a773384e5 Author: David Green <david.green@arm.com> Date: Mon Sep 9 18:04:38 2024 +0100 [ARM] Add a default unreachable case to AddrModeToString. NFC Fixes #107739 commit c36c462cc719d47aa2408bca91a028300b2be6d4 Author: Kazu Hirata <kazu@google.com> Date: Mon Sep 9 09:44:37 2024 -0700 [LTO] Simplify calculateCallGraphRoot (NFC) (#107765) The function returns an instance of FunctionSummary populated by calculateCallGraphRoot regardless of whether Edges is empty or not. commit 7d371725cdf993d16f6debf74cf740c3aea84f9b Author: Mingming Liu <mingmingl@google.com> Date: Mon Sep 9 09:43:47 2024 -0700 [NFCI][BitcodeReader]Read real GUID from VI as opposed to storing it in map (#107735) Currently, `ValueIdToValueInfoMap` [1] stores `std::tuple<ValueInfo, GlobalValue::GUID /* original GUID */, GlobalValue::GUID /* real GUID*/ >`. This change updates the stored value type to `std::pair<ValueInfo, GlobalValue::GUID /* original GUID */>`, and reads real GUID from ValueInfo. When an entry is inserted into `ValueIdToValueInfoMap`, ValueInfo is created or inserted using real GUID [2]. ValueInfo keeps a pointer to GlobalValueMap [3], using either `GUID` or `{GUID, Name}` [4] when reading per-module summaries to create a combined summary. [1] owned by per module-summary bitcode reader https://github.com/llvm/llvm-project/blob/caebb4562ce634a22f7b13480b19cffc2a6a6730/llvm/lib/Bitcode/Reader/BitcodeReader.cpp#L947-L950 [2] [first](https://github.com/llvm/llvm-project/blob/caebb4562ce634a22f7b13480b19cffc2a6a6730/llvm/lib/Bitcode/Reader/BitcodeReader.cpp#L7130-L7133), [second](https://github.com/llvm/llvm-project/blob/caebb4562ce634a22f7b13480b19cffc2a6a6730/llvm/lib/Bitcode/Reader/BitcodeReader.cpp#L7221-L7222), [third](https://github.com/llvm/llvm-project/blob/caebb4562ce634a22f7b13480b19cffc2a6a6730/llvm/lib/Bitcode/Reader/BitcodeReader.cpp#L7622-L7623) [3] https://github.com/llvm/llvm-project/blob/caebb4562ce634a22f7b13480b19cffc2a6a6730/llvm/include/llvm/IR/ModuleSummaryIndex.h#L1427-L1431 [4] https://github.com/llvm/llvm-project/blob/caebb4562ce634a22f7b13480b19cffc2a6a6730/llvm/include/llvm/IR/ModuleSummaryIndex.h#L1631 and https://github.com/llvm/llvm-project/blob/caebb4562ce634a22f7b13480b19cffc2a6a6730/llvm/include/llvm/IR/ModuleSummaryIndex.h#L1621 --------- Co-authored-by: Kazu Hirata <kazu@google.com> commit 60f052edc66a5b5b346635656f231930c436a008 Author: Petr Hosek <phosek@google.com> Date: Mon Sep 9 09:43:02 2024 -0700 [CMake] Passthrough variables for packages to subbuilds (#107611) These packaged are imported by LLVMConfig.cmake and so we should be passing through the necessary variables from the parent build into the subbuilds. We use `CMAKE_CACHE_DEFAULT_ARGS` so subbuilds can override these variables if needed. commit 5c8fd1eece8fff69871cef57a2363dc0f734a7d1 Author: Sam Clegg <sbc@chromium.org> Date: Mon Sep 9 09:28:08 2024 -0700 [lld][WebAssembly] Fix use of uninitialized stack data with --wasm64 (#107780) In the case of `--wasm64` we were setting the type of the init expression to be 64-bit but were only setting the low 32-bits of the value (by assigning to Int32). Fixes: https://github.com/emscripten-core/emscripten/issues/22538 commit 95753ffa49f57c284a4682a8ca03e05d59f2c112 Author: LLVM GN Syncbot <llvmgnsyncbot@gmail.com> Date: Mon Sep 9 16:13:05 2024 +0000 [gn build] Port ea2da571c761 commit db6051dae085c35020c1273ae8d38508c9958bc7 Author: Pavel Skripkin <paskripkin@gmail.com> Date: Mon Sep 9 19:12:38 2024 +0300 [analyzer] fix crash on binding to symbolic region with `void *` type (#107572) As reported in https://github.com/llvm/llvm-project/pull/103714#issuecomment-2295769193. CSA crashes on trying to bind value to symbolic region with `void *`. This happens when such region gets passed as inline asm input and engine tries to bind `UnknownVal` to that region. Fix it by changing type from void to char before calling `GetElementZeroRegion` commit 3cdb30ebbc18fa894d3bd67aebcff76ce7c741ac Author: Krystian Stasiowski <sdkrystian@gmail.com> Date: Mon Sep 9 12:06:45 2024 -0400 [Clang][Sema] Use the correct lookup context when building overloaded 'operator->' in the current instantiation (#104458) Currently, clang erroneously rejects the following: ``` struct A { template<typename T> void f(); }; template<typename T> struct B { void g() { (*this)->template f<int>(); // error: no member named 'f' in 'B<T>' } A* operator->(); }; ``` This happens because `Sema::ActOnStartCXXMemberReference` does not adjust the `ObjectType` parameter when `ObjectType` is a dependent type (except when the type is a `PointerType` and the class member access is the `->` form). Since the (possibly adjusted) `ObjectType` parameter (`B<T>` in the above example) is passed to `Parser::ParseOptionalCXXScopeSpecifier`, we end up looking up `f` in `B` rather than `A`. This patch fixes the issue by identifying cases where the type of the object expression `T` is a dependent, non-pointer type and: - `T` is the current instantiation and lookup for `operator->` finds a member of the current instantiation, or - `T` has at least one dependent base case, and `operator->` is not found in the current instantiation and using `ASTContext::DependentTy` as the type of the object expression when the optional _nested-name-specifier_ is parsed. Fixes #104268. commit eba6160deec5a32e4b31c2a446172d0e388195c9 Author: Tarun Prabhu <tarun@lanl.gov> Date: Mon Sep 9 09:57:49 2024 -0600 [flang][Driver] Support --no-warnings option (#107455) Because of the way visibility is implemented in Options.td, options that are aliases do not inherit the visibility of the option being aliased. Therefore, explicitly set the visibility of the alias to be the same as the aliased option. This partially addresses https://github.com/llvm/llvm-project/issues/89888 commit 914ab366c24cf494a798ce3a178686456731861a Author: sstipanovic <146831748+sstipanovic@users.noreply.github.com> Date: Mon Sep 9 17:54:30 2024 +0200 [AMDGPU] Overload image atomic swap to allow float as well. (#107283) LLPC can generate llvm.amdgcn.image.atomic.swap intrinsic with data argument as float type as well as float return type. This went unnoticed until CreateIntrinsic with implicit mangling was used. commit ea2da571c761066542f8d2273933d2523279e631 Author: Tyler Nowicki <tyler.nowicki@amd.com> Date: Mon Sep 9 11:50:27 2024 -0400 [Coroutines] Move the SuspendCrossingInfo analysis helper into its own header/source (#106306) * Move the SuspendCrossingInfo analysis helper into its own header/source See RFC for more info: https://discourse.llvm.org/t/rfc-abi-objects-for-coroutines/81057 Co-authored-by: tnowicki <tnowicki.nowicki@amd.com> commit 1651014960b90bd1398f61bec0866d4a187910ef Author: Rahul Joshi <rjoshi@nvidia.com> Date: Mon Sep 9 08:47:42 2024 -0700 [TableGen] Change SetTheory set/vec to use const Record * (#107692) Change SetTheory::RecSet/RecVec to use const Record pointers. commit e46f03bc31a61a903416f1d3c68063ab75aebe6e Author: Teresa Johnson <tejohnson@google.com> Date: Mon Sep 9 08:17:41 2024 -0700 [MemProf] Remove unnecessary data structure (NFC) (#107643) Recent change #106623 added the CallToFunc map, but I subsequently realized the same information is already available for the calls being examined in the StackIdToMatchingCalls map we're iterating through. commit 86e5c5468ae3fcd65b23fd7b3cb0182e676829bd Author: Nicolas van Kempen <nvankemp@gmail.com> Date: Mon Sep 9 11:15:28 2024 -0400 [clang-tidy][run-clang-tidy] Fix minor shutdown noise (#105724) On my new machine, the script outputs some shutdown noise: ``` Ctrl-C detected, goodbye. Traceback (most recent call last): File "/home/nvankempen/llvm-project/./clang-tools-extra/clang-tidy/tool/run-clang-tidy.py", line 626, in <module> asyncio.run(main()) File "/usr/lib/python3.10/asyncio/runners.py", line 44, in run return loop.run_until_complete(main) File "/usr/lib/python3.10/asyncio/base_events.py", line 636, in run_until_complete self.run_forever() File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever self._run_once() File "/usr/lib/python3.10/asyncio/base_events.py", line 1871, in _run_once event_list = self._selector.select(timeout) File "/usr/lib/python3.10/selectors.py", line 469, in select fd_event_list = self._selector.poll(timeout, max_ev) KeyboardInterrupt ``` This fixes it. Also remove an unused typing import. Relevant documentation: https://docs.python.org/3/library/asyncio-runner.html#handling-keyboard-interruption commit 763bc9249cf0b7da421182e24716d9a569fb5184 Author: Jakub Kuderski <jakub@nod-labs.com> Date: Mon Sep 9 11:12:26 2024 -0400 [mlir][amdgpu] Align Chipset with TargetParser (#107720) Update the Chipset struct to follow the `IsaVersion` definition from llvm's `TargetParser`. This is a follow up to https://github.com/llvm/llvm-project/pull/106169#discussion_r1733955012. * Add the stepping version. Note: This may break downstream code that compares against the minor version directly. * Use comparisons with full Chipset version where possible. Note that we can't use the code in `TargetParser` directly because the chipset utility is outside of `mlir/Target` that re-exports llvm's target library. commit 6cc3bf7d1d343f910b40cee24d4cda873a6ddd55 Author: Quinn Dawkins <quinn.dawkins@gmail.com> Date: Mon Sep 9 11:05:37 2024 -0400 [mlir][tensor] Add canonicalization to fold consecutive tensor.pad ops (#107302) `tensor.pad(tensor.pad)` with the same constant padding value can be combined into a single pad that pads to the sum of the high and low padding amounts. commit ea9204505cf1099b98b1fdcb898f0bd35e463984 Author: Lei Huang <lei@ca.ibm.com> Date: Mon Sep 9 11:01:22 2024 -0400 Fix codegen for transparent_union function params (#104816) Update codegen for func param with transparent_union attr to be that of the first union member. This is a followup to #101738 to fix non-ppc codegen and closes #76773. commit 6634d44e5e6079e19efe54c2de35e2e63108b085 Author: Amy Wang <kai.ting.wang@huawei.com> Date: Mon Sep 9 10:57:13 2024 -0400 [MLIR][Transform] Allow stateInitializer and stateExporter for applyTransforms (#101186) This is discussed in RFC: https://discourse.llvm.org/t/rfc-making-the-constructor-of-the-transformstate-class-protected/80377 commit 111932d5cae0199d9c59669b37232a011f8b8757 Author: Luke Lau <luke@igalia.com> Date: Mon Sep 9 22:45:44 2024 +0800 [RISCV] Fix same mask vmerge peephole discarding false operand (#107827) This fixes the issue raised in https://github.com/llvm/llvm-project/pull/106108#discussion_r1749677510 True's passthru needs to be equivalent to vmerge's false, but we also allow true's passthru to be undef. However if it's undef then we need to replace it with false, otherwise we end up discarding the false operand entirely. The changes in fixed-vectors-strided-load-store-asm.ll undo the changes in #106108 where we introduced this miscompile. commit 2d338bed00b2bba713bceb4915400063b95929b2 Author: Tobias Stadler <mail@stadler-tobias.de> Date: Mon Sep 9 16:30:44 2024 +0200 [CodeGen] Refactor DeadMIElim isDead and GISel isTriviallyDead (#105956) Merge GlobalISel's isTriviallyDead and DeadMachineInstructionElim's isDead code and remove all unnecessary checks from the hot path by looping over the operands before doing any other checks. See #105950 for why DeadMIElim needs to remove LIFETIME markers even though they probably shouldn't generally be considered dead. x86 CTMark O3: -0.1% AArch64 GlobalISel CTMark O0: -0.6%, O2: -0.2% commit a2f659c1349cb70c09b183eb214e2a24cf04c2c6 Author: Kazu Hirata <kazu@google.com> Date: Mon Sep 9 07:15:12 2024 -0700 [StructurizeCFG] Avoid repeated hash lookups (NFC) (#107797) commit ab95ed5ce0b099913eb5c9b03fef7f322c24acd2 Author: Kazu Hirata <kazu@google.com> Date: Mon Sep 9 07:14:40 2024 -0700 [IPO] Avoid repeated hash lookups (NFC) (#107796) commit 3940a1ba1454afec916be86385bb2031526e3e13 Author: Kazu Hirata <kazu@google.com> Date: Mon Sep 9 07:13:52 2024 -0700 [Float2Int] Avoid repeated hash lookups (NFC) (#107795) commit 563dc226fe17f7638d02a957d1b2870dfa968f01 Author: Kazu Hirata <kazu@google.com> Date: Mon Sep 9 07:13:27 2024 -0700 [Analysis] Avoid repeated hash lookups (NFC) (#107794) commit 620b8d994b8abdcf31271d9f4db7e7422fc9bd65 Author: Samuel Thibault <samuel.thibault@ens-lyon.org> Date: Mon Sep 9 15:53:33 2024 +0200 [hurd] Fix accessing f_type field of statvfs (#71851) f4719c4d2cda ("Add support for GNU Hurd in Path.inc and other places") made llvm use an internal __f_type name for the f_type field (which it is not supposed to since accessing double-underscore names is explicitly not supported by standards). In glibc 2.39 this field was renamed to f_type so application can now access the field as the standard says. commit eaac4a26136ca8e3633bf91795343cd060d7af87 Author: Pierre van Houtryve <pierre.vanhoutryve@amd.com> Date: Mon Sep 9 15:35:28 2024 +0200 [AMDGPU] Document & Finalize GFX12 Memory Model (#98599) Documents the memory model implemented as of #98591, with some fixes/optimizations to the implementation. commit 1a5a1e97817c9a3db4d1f9795789c99790cf88e2 Author: Florian Hahn <flo@fhahn.com> Date: Mon Sep 9 14:26:08 2024 +0100 [VPlan] Assert that VFxUF is always used. Add assertion to ensure invariant discussed in https://github.com/llvm/llvm-project/pull/95305. commit 1f2a634c44dedef11f590956f297b2c7a1659fcf Author: Sergey Kachkov <sergey.kachkov@syntacore.com> Date: Wed Sep 4 17:42:03 2024 +0300 Reland "[LSR] Do not create duplicated PHI nodes while preserving LCSSA form" (#107380) Motivating example: https://godbolt.org/z/eb97zrxhx Here we have 2 induction variables in the loop: one is corresponding to i variable (add rdx, 4), the other - to res (add rax, 2). The second induction variable can be removed by rewriteLoopExitValues() method (final value of res at loop exit is unroll_iter * -2); however, this doesn't happen because we have duplicated LCSSA phi nodes at loop exit: ``` ; Preheader: for.body.preheader.new: ; preds = %for.body.preheader %unroll_iter = and i64 %N, -4 br label %for.body ; Loop: for.body: ; preds = %for.body, %for.body.preheader.new %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 0, %for.body.preheader.new ] %i.07 = phi i64 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ] %inc.3 = add nuw i64 %i.07, 4 %lsr.iv.next = add nsw i64 %lsr.iv, -2 %niter.ncmp.3.not = icmp eq i64 %unroll_iter, %inc.3 br i1 %niter.ncmp.3.not, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !llvm.loop !7 ; Exit blocks for.end.loopexit.unr-lcssa.loopexit: ; preds = %for.body %inc.3.lcssa = phi i64 [ %inc.3, %for.body ] %lsr.iv.next.lcssa11 = phi i64 [ %lsr.iv.next, %for.body ] %lsr.iv.next.lcssa = phi i64 [ %lsr.iv.next, %for.body ] br label %for.end.loopexit.unr-lcssa ``` rewriteLoopExitValues requires %lsr.iv.next value to have only 2 uses: one in LCSSA phi node, the other - in induction phi node. Here we have 3 uses of this value because of duplicated lcssa nodes, so the transform doesn't apply and leads to an extra add operation inside the loop. The proposed solution is to accumulate inserted instructions that will require LCSSA form update into SetVector and then call formLCSSAForInstructions for this SetVector once, so the same instructions don't process twice. Reland fixes the issue with preserve-lcssa.ll test: it fails in the situation when x86_64-unknown-linux-gnu target is unavailable in opt. The changes are moved into separate duplicated-phis.ll test with explicit x86 target requirement to fix bots which are not building this target. commit 17f0c5dfaab8bc72e19cb68e73b0944e5ee27b88 Author: Sergey Kachkov <sergey.kachkov@syntacore.com> Date: Fri Aug 30 16:00:42 2024 +0300 [LSR][NFC] Add pre-commit test commit aa158bf40285925d3c019d9e697cd2c88421297a Author: Florian Hahn <flo@fhahn.com> Date: Mon Sep 9 14:10:12 2024 +0100 [LV] Update tests to replace some code with loop varying instructions. Update some tests with loop-invariant instructions, where hoisting them out of the loop changes the vectorization decision. This should preserve their original spirit when making further improvements. commit e25eb1433110d94d16fd69e5aca9bdf72259263d Author: Florian Hahn <flo@fhahn.com> Date: Mon Sep 9 13:05:54 2024 +0100 [ConstraintElim] Add tests for loops with chained header conditions. commit 1199e5b9ce5a001445463ba8da1f70fa4558fbcc Author: Nikita Popov <npopov@redhat.com> Date: Mon Sep 9 12:45:48 2024 +0200 [MemCpyOpt] Add more tests for memcpy passed to readonly arg (NFC) commit cf8fb4320f1be29c55909adf5ff8ad47e02b2dbe Author: Momchil Velikov <momchil.velikov@arm.com> Date: Mon Sep 9 13:34:41 2024 +0100 [AArch64] Implement NEON vamin/vamax intrinsics (#99041) This patch implements the intrinsics of the form floatNxM_t vamin[q]_fN(floatNxM_t vn, floatNxM_t vm); floatNxM_t vamax[q]_fN(floatNxM_t vn, floatNxM_t vm); as defined in https://github.com/ARM-software/acle/pull/324 --------- Co-authored-by: Hassnaa Hamdi <hassnaa.hamdi@arm.com> commit 32cef07885e112d05bc2b1c285f40e353d80e18f Author: Rahul Joshi <rjoshi@nvidia.com> Date: Mon Sep 9 05:27:38 2024 -0700 [LLDB][TableGen] Migrate lldb-tblgen to use const RecordKeeper (#107536) Migrate LLDB TableGen backend to use const RecordKeeper. This is a part of effort to have better const correctness in TableGen backends: https://discourse.llvm.org/t/psa-planned-changes-to-tablegen-getallderiveddefinitions-api-potential-downstream-breakages/81089 commit cca54e347ac34912cdfb9983533c61836db135e0 Author: Martin Storsjö <martin@martin.st> Date: Mon Sep 9 15:08:19 2024 +0300 Revert "Reapply "[Clang][CWG1815] Support lifetime extension of temporary created by aggregate initialization using a default member initializer" (#97308)" This reverts commit 45c8766973bb3bb73dd8d996231e114dcf45df9f and 049512e39d96995cb373a76cf2d009a86eaf3aab. This change triggers failed asserts on inputs like this: struct a { } constexpr b; class c { public: c(a); }; class B { public: using d = int; struct e { enum { f } g; int h; c i; d j{}; }; }; B::e k{B::e::f, int(), b}; Compiled like this: clang -target x86_64-linux-gnu -c repro.cpp clang: ../../clang/lib/CodeGen/CGExpr.cpp:3105: clang::CodeGen::LValue clang::CodeGen::CodeGenFunction::EmitDeclRefLValue(const clang::DeclRefExpr*): Assertion `(ND->isUsed(false) || !isa<VarDecl>(ND) || E->isNonOdrUse() || !E->getLocation().isValid()) && "Should not use decl without marking it used!"' failed. commit 7a930ce327fdbc5c77b50ee6304645084100c037 Author: Jeremy Morse <jeremy.morse@sony.com> Date: Mon Sep 9 12:54:45 2024 +0100 [DWARF] Emit a minimal line-table for totally empty functions (#107267) In degenerate but legal inputs, we can have functions that have no source locations at all -- all the DebugLocs attached to instructions are empty. LLVM didn't produce any source location for the function; with this patch it will at least emit the function-scope source location. Demonstrated by empty-line-info.ll The XCOFF test modified has similar symptoms -- with this patch, the size of the ".dwline" section grows a bit, thus shifting some of the file internal offsets, which I've updated. commit 959d84044a70da08923fe221f999f4e406094ee9 Author: pvanhout <pierre.vanhoutryve@amd.com> Date: Mon Sep 9 13:50:48 2024 +0200 [AMDGPU] Remove unused SplitGraph::Node::getFullCost commit b8b8fbe19dea2825b801c4738ff78dbf26aae430 Author: Rahul Joshi <rjoshi@nvidia.com> Date: Mon Sep 9 04:18:55 2024 -0700 [NFC][TableGen] Migrate LLVM Attribute Emitter to const RecordKeeper (#107698) Migrate LLVM Attribute Emitter to const RecordKeeper. commit d84d9559bdc7aeb4ce14c251f6a3490c66db8d3a Author: Nicolas van Kempen <nvankemp@gmail.com> Date: Mon Sep 9 07:12:46 2024 -0400 [clang][analyzer] Fix #embed crash (#107764) Fix #107724. commit 09c00b6f0463f6936be5d2100f9d47c0077700f8 Author: Benjamin Kramer <benny.kra@googlemail.com> Date: Mon Sep 9 13:03:38 2024 +0200 [bazel] Add missing dependencies for 345cc47ba7a28811ae4ec7d113059ccb39c500a3 commit 049512e39d96995cb373a76cf2d009a86eaf3aab Author: yronglin <yronglin777@gmail.com> Date: Mon Sep 9 19:01:11 2024 +0800 [NFC][clang] Fix clang version in the test for the implementation of cwg1815 (#107838) This PR fix the clang version in https://github.com/llvm/llvm-project/pull/97308 . Signed-off-by: yronglin <yronglin777@gmail.com> commit 345cc47ba7a28811ae4ec7d113059ccb39c500a3 Author: Daniil Fukalov <dfukalov@gmail.com> Date: Mon Sep 9 12:44:03 2024 +0200 [NFC] Add explicit #include llvm-config.h where its macros are used, lldb part. (#107603) (this is lldb part) Without these explicit includes, removing other headers, who implicitly include llvm-config.h, may have non-trivial side effects. For example, `clangd` may report even `llvm-config.h` as "no used" in case it defines a macro, that is explicitly used with #ifdef. It is actually amplified with different build configs which use different set of macros. commit dbd81ba2e85c2f244f22c983d96a106eae65c06a Author: Mikhail Goncharov <goncharov.mikhail@gmail.com> Date: Mon Sep 9 11:47:47 2024 +0200 complete rename of __orc_rt namespace for 3e04ad428313dde40c779af6d675b162e150125e it's bizzare that none of the builbots were broken, only bazel build https://buildkite.com/llvm-project/upstream-bazel/builds/109623#0191d5d0-2b3e-4ee7-b8dd-1e2580977e9b commit 663e9cec9c96169aa4e72ab9b6bf08b2d6603093 Author: Artem Kroviakov <71938912+akroviakov@users.noreply.github.com> Date: Mon Sep 9 11:49:16 2024 +0200 [Func][GPU] Use SymbolUserOpInterface in func::ConstantOp (#107748) This PR enables `func::ConstantOp` creation and usage for device functions inside GPU modules. The current main returns error for referencing device functions via `func::ConstantOp`, because during the `ConstantOp` verification it only checks symbols in `ModuleOp` symbol table, which, of course, does not contain device functions that are defined in `GPUModuleOp`. This PR proposes a more general solution. Co-authored-by: Artem Kroviakov <artem.kroviakov@tum.de> commit aa21ce4a792c170074193c32e8ba8dd35e57c628 Author: Jonas Rickert <Jonas.Rickert@amd.com> Date: Mon Sep 9 11:48:13 2024 +0200 [mlir] Do not set lastToken in AsmParser's resetToken function and add a unit test for AsmParsers's locations (#105529) This changes the function `resetToken` to not update `lastToken`. The member `lastToken` is the last token that was consumed by the parser. Resetting the lexer position to a different position does not cause any token to be consumed, so `lastToken` should not be updated. Setting it to `curToken` can cause the scopeLoc.end location of `OperationDefinition `to be off-by-one, pointing to the first token after the operation. An example for an operation for which the scopeLoc.end location was wrong before is: ``` %0 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xbf16>) : !torch.vtensor<[768],bf16> ``` Here the scope end loc always pointed to the next token This also adds a test for the Locations of `OperationDefinitions`. Without the change to `resetToken` the test failes, with the scope end location for `llvm.mlir.undef` pointing to the `func.return` in the next line commit b98aa6fb1d5f5fa904ce6d789a8fa4a245a90ee6 Author: Simon Pilgrim <llvm-dev@redking.me.uk> Date: Mon Sep 9 10:29:04 2024 +0100 [X86] LowerABD - lower i8/i16 cases directly to CMOV(SUB(X,Y),SUB(Y,X)) pattern Better codegen (shorter dependency chain for better ILP) than via the TRUNC(ABS(SUB(EXT(LHS),EXT(RHS)))) expansion commit d57be195e37f9c11a26e8e3fe8da5ef62bb921af Author: Lukacma <Marian.Lukac@arm.com> Date: Mon Sep 9 10:28:01 2024 +0100 [AArch64] replace SVE intrinsics with no active lanes with zero (#107413) This patch extends https://github.com/llvm/llvm-project/pull/73964 and optimises SVE intrinsics into zero constants when predicate is zero. commit 476b1a661f6846537d232e9a3bc5a68c5f15efb3 Author: Jerry-Ge <jerry.ge@arm.com> Date: Mon Sep 9 02:26:39 2024 -0700 [TOSA] Update input name for Sin and Cos operators (#107606) Update the dialect input names from input to input1 for Sin/Cos for consistency. Signed-off-by: Jerry Ge <jerry.ge@arm.com> commit da11ede57d034767a6f5d5e211c06c1c3089d7fd Author: vabridgers <58314289+vabridgers@users.noreply.github.com> Date: Mon Sep 9 03:47:39 2024 -0500 [analyzer] Remove overzealous "No dispatcher registered" assertion (#107294) Random testing revealed it's possible to crash the analyzer with the command line invocation: clang -cc1 -analyze -analyzer-checker=nullability empty.c where the source file, empty.c is an empty source file. ``` clang: <root>/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp:56: void clang::ento::CheckerManager::finishedCheckerRegistration(): Assertion `Event.second.HasDispatcher && "No dispatcher registered for an event"' failed. PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ Stack dump: 0. Program arguments: clang -cc1 -analyze -analyzer-checker=nullability nullability-nocrash.c ... clang::AnalyzerOptions&, clang::Preprocessor const&, llvm::ArrayRef<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>, llvm::ArrayRef<std::function<void (clang::ento::CheckerRegistry&)>>) ``` This commit removes the assertion which failed here, because it was logically incorrect: it required that if an Event is handled by some (enabled) checker, then there must be an **enabled** checker which can emit that kind of Event. It should be OK to disable the event-producing checkers but enable an event-consuming checker which has different responsibilities in addition to handling the events. Note that this assertion was in an `#ifndef NDEBUG` block, so this change does not impact the non-debug builds. Co-authored-by: Vince Bridgers <vince.a.bridgers@ericsson.com> commit 04742f34b343af87dda93edacbb06f6e98a1d80f Author: Nikita Popov <npopov@redhat.com> Date: Mon Sep 9 10:24:54 2024 +0200 [SCCP] Add test for nonnull argument inference (NFC) commit 3b1146e050657f40954e8e1f977837f884df2488 Author: Aiden Grossman <aidengrossman@google.com> Date: Mon Sep 9 01:27:22 2024 -0700 [llvm-exegesis] Use MCRegister instead of unsigned to hold registers (#107820) commit 74ad2540523ec78122ba5a32e35e0b65ee27b7b3 Author: Aiden Grossman <aidengrossman@google.com> Date: Mon Sep 9 08:10:11 2024 +0000 [Github][MLGO] Fix mlgo-utils path in new-prs-labeler This patch (hopefully) fixes the mlgo-utils path in new-prs-labeler so that it actually matches all files in that directory. Currently it is not catching the files as they are relatively deeply nested within the folder. commit 3e04ad428313dde40c779af6d675b162e150125e Author: Lang Hames <lhames@gmail.com> Date: Mon Sep 9 17:59:47 2024 +1000 [ORC-RT] Remove double underscore from the orc_rt namespace. We should use `orc_rt` as the public C++ API namespace for the ORC runtime and control symbol visibility to hide implementation details, rather than rely on the '__' prefix. commit d5f6f30664ed53ef27d949fad0ce3994ea9988dd Author: Aiden Grossman <aidengrossman@google.com> Date: Mon Sep 9 07:49:54 2024 +0000 [MLGO] Add spaces at the end of lines in multiline string This patch adds spaces at the end of lines in multiline strings in the extract_ir script. Without this patch, the warning/info messages will be printed without spaces between words when there is a line break in the source which looks/reads weird. commit 8549b324bc1f450f4477f46f18db67439dbf6d75 Author: Younan Zhang <zyn7109@gmail.com> Date: Mon Sep 9 15:09:43 2024 +0800 [Clang] Don't assert non-empty packs for FunctionParmPackExprs (#107561) `FunctionParmPackExpr`s are peculiar in that they have to be of unexpanded dependency while they don't introduce any unexpanded packs. So this patch rules them out in the non-empty pack assertion in `DiagnoseUnexpandedParameterPack()`. There was a fix #69224, but that turned out to be insufficient. I also moved the separate tests to a pre-existing file. Fixes https://github.com/llvm/llvm-project/issues/86361 commit 022b3c27e27832f27c61683095899227c26e0cca Author: Piyou Chen <piyou.chen@sifive.com> Date: Mon Sep 9 15:07:39 2024 +0800 [Clang][RISCV] Recognize unsupport target feature by supporting isValidFeatureName (#106495) This patch makes unsupported target attributes emit a warning and ignore the target attribute during semantic checks. The changes include: 1. Adding the RISCVTargetInfo::isValidFeatureName function. 2. Rejecting non-full-arch strings in the handleFullArchString function. 3. Adding test cases to demonstrate the warning behavior. commit 9347b66cfcd9acf84dbbd500b6344041c587f6a9 Author: Pierre van Houtryve <pierre.vanhoutryve@amd.com> Date: Mon Sep 9 09:06:34 2024 +0200 Reland "[AMDGPU] Graph-based Module Splitting Rewrite (#104763)" (#107076) Relands #104763 with - Fixes for EXPENSIVE_CHECKS test failure (due to sorting operator failing if the input is shuffled first) - Fix for broken proposal selection - c3cb27370af40e491446164840766478d3258429 included Original commit description below --- Major rewrite of the AMDGPUSplitModule pass in order to better support it long-term. Highlights: - Removal of the "SML" logging system in favor of just using CL options and LLVM_DEBUG, like any other pass in LLVM. - The SML system started from good intentions, but it was too flawed and messy to be of any real use. It was also a real pain to use and made the code more annoying to maintain. - Graph-based module representation with DOTGraph printing support - The graph represents the module accurately, with bidirectional, typed edges between nodes (a node usually represents one function). - Nodes are assigned IDs starting from 0, which allows us to represent a set of nodes as a BitVector. This makes comparing 2 sets of nodes to find common dependencies a trivial task. Merging two clusters of nodes together is also really trivial. - No more defaulting to "P0" for external calls - Roots that can reach non-copyable dependencies (such as external calls) are now grouped together in a single "cluster" that can go into any partition. - No more defaulting to "P0" for indirect calls - New representation for module splitting proposals that can be graded and compared. - Graph-search algorithm that can explore multiple branches/assignments for a cluster of functions, up to a maximum depth. - With the default max depth of 8, we can create up to 256 propositions to try and find the best one. - We can still fall back to a greedy approach upon reaching max depth. That greedy approach uses almost identical heuristics to the previous version of the pass. All of this gives us a lot of room to experiment with new heuristics or even entirely different splitting strategies if we need to. For instance, the graph representation has room for abstract nodes, e.g. if we need to represent some global variables or external constraints. We could also introduce more edge types to model other type of relations between nodes, etc. I also designed the graph representation & the splitting strategies to be as fast as possible, and it seems to have paid off. Some quick tests showed that we spend pretty much all of our time in the CloneModule function, with the actual splitting logic being >1% of the runtime. commit bdcbfa7fb4ac6f23262095c401d28309d689225e Author: LLVM GN Syncbot <llvmgnsyncbot@gmail.com> Date: Mon Sep 9 06:28:13 2024 +0000 [gn build] Port a416267a5f3f commit a416267a5f3fffb3d1e9d8d53245aef8169c5ddb Author: Yuxuan Chen <ych@fb.com> Date: Sun Sep 8 23:09:40 2024 -0700 [LLVM][Coroutines] Transform "coro_elide_safe" calls to switch ABI coroutines to the `noalloc` variant (#99285) This patch is episode three of the middle end implementation for the coroutine HALO improvement project published on discourse: https://discourse.llvm.org/t/language-extension-for-better-more-deterministic-halo-for-c-coroutines/80044 After we attribute the calls to some coroutines as "coro_elide_safe" in the C++ FE and creating a `noalloc` ramp function, we use a new middle end pass to move the call to coroutines to the noalloc variant. This pass should be run after CoroSplit. For each node we process in CoroSplit, we look for its callers and replace the attributed ones in presplit coroutines to the noalloc one. The transformed `noalloc` ramp function will also require a frame pointer to a block of memory it can use as an activation frame. We allocate this on the caller's frame with an alloca. Please note that we cannot safely transform such attributed calls in post-split coroutines due to memory lifetime reasons. The CoroSplit pass is responsible for creating the coroutine frame spills for all the allocas in the coroutine. Therefore it will be unsafe to create new allocas like this one in post-split coroutines. This happens relatively rarely because CGSCC performs the passes on the callees before the caller. However, if multiple coroutines coexist in one SCC, this situation does happen (and prevents us from having potentially unbound frame size due to recursion.) You can find episode 1: Clang FE of this patch series at https://github.com/llvm/llvm-project/pull/99282 Episode 2: CoroSplit at https://github.com/llvm/llvm-project/pull/99283 commit 234cc81625030e934651d6ae0ace66e37138ba4a Author: Yuxuan Chen <ych@fb.com> Date: Sun Sep 8 23:09:20 2024 -0700 [LLVM][Coroutines] Create `.noalloc` variant of switch ABI coroutine ramp functions during CoroSplit (#99283) This patch is episode two of the coroutine HALO improvement project published on discourse: https://discourse.llvm.org/t/language-extension-for-better-more-deterministic-halo-for-c-coroutines/80044 Previously CoroElide depends on inlining, and its analysis does not work very well with code generated by the C++ frontend due the existence of many customization points. There has been issue reported to upstream how ineffective the original CoroElide was in real world applications. For C++ users, this set of patches aim to fix this problem by providing library authors and users deterministic HALO behaviour for some well-behaved coroutine `Task` types. The stack begins with a library side attribute on the `Task` class that guarantees no unstructured concurrency when coroutines are awaited directly with `co_await`ed as a prvalue. This attribute on Task types gives us lifetime guarantees and makes C++ FE capable to telling the ME which coroutine calls are elidable. We convey such information from FE through the attribute `coro_elide_safe`. This patch modifies CoroSplit to create a variant of the coroutine ramp function that 1) does not use heap allocated frame, instead take an additional parameter as the pointer to the frame. Such parameter is attributed with `dereferenceble` and `align` to convey size and align requirements for the frame. 2) always stores cleanup instead of destroy address for `coro.destroy()` actions. In a later patch, we will have a new pass that runs right after CoroSplit to find usages of the callee coroutine attributed `coro_elide_safe` in presplit coroutine callers, allocates the frame on its "stack", transform those usages to call the `noalloc` ramp function variant. (note I put quotes on the word "stack" here, because for presplit coroutine, any alloca will be spilled into the frame when it's being split) The C++ Frontend attribute implementation that works with this change can be found at https://github.com/llvm/llvm-project/pull/99282 The pass that makes use of the new `noalloc` split can be found at https://github.com/llvm/llvm-project/pull/99285 commit e17a39bc314f97231e440c9e68d9f46a9c07af6d Author: Yuxuan Chen <ych@fb.com> Date: Sun Sep 8 23:08:58 2024 -0700 [Clang] C++20 Coroutines: Introduce Frontend Attribute [[clang::coro_await_elidable]] (#99282) This patch is the frontend implementation of the coroutine elide improvement project detailed in this discourse post: https://discourse.llvm.org/t/language-extension-for-better-more-deterministic-halo-for-c-coroutines/80044 This patch proposes a C++ struct/class attribute `[[clang::coro_await_elidable]]`. This notion of await elidable task gives developers and library authors a certainty that coroutine heap elision happens in a predictable way. Originally, after we lower a coroutine to LLVM IR, CoroElide is responsible for analysis of whether an elision can happen. Take this as an example: ``` Task foo(); Task bar() { co_await foo(); } ``` For CoroElide to happen, the ramp function of `foo` must be inlined into `bar`. This inlining happens after `foo` has been split but `bar` is usually still a presplit coroutine. If `foo` is indeed a coroutine, the inlined `coro.id` intrinsics of `foo` is visible within `bar`. CoroElide then runs an analysis to figure out whether the SSA value of `coro.begin()` of `foo` gets destroyed before `bar` terminates. `Task` types are rarely simple enough for the destroy logic of the task to reference the SSA value from `coro.begin()` directly. Hence, the pass is very ineffective for even the most trivial C++ Task types. Improving CoroElide by implementing more powerful analyses is possible, however it doesn't give us the…

lei137 requested review from daltenty, hubert-reinterpretcast, chenzheng1030, mandlebug and ahatanak August 2, 2024 19:12

llvmbot added clang Clang issues not falling into any other category backend:PowerPC clang:codegen labels Aug 2, 2024

lei137 mentioned this pull request Aug 2, 2024

[PowerPC][RISCV] Attribute 'signext' applied to incompatible type! #76773

Closed

hubert-reinterpretcast reviewed Aug 5, 2024

View reviewed changes

clang/lib/CodeGen/ABIInfoImpl.h Outdated Show resolved Hide resolved

clang/lib/CodeGen/Targets/PPC.cpp Outdated Show resolved Hide resolved

lei137 requested a review from hubert-reinterpretcast August 6, 2024 16:33

lei137 self-assigned this Aug 7, 2024

lei137 marked this pull request as draft August 7, 2024 17:32

lei137 marked this pull request as ready for review August 7, 2024 19:34

amy-kwan reviewed Aug 16, 2024

View reviewed changes

lei137 requested a review from amy-kwan August 16, 2024 19:46

efriedma-quic approved these changes Aug 17, 2024

View reviewed changes

hubert-reinterpretcast reviewed Aug 19, 2024

View reviewed changes

clang/test/CodeGen/PowerPC/transparent_union.c Outdated Show resolved Hide resolved

llvm/test/CodeGen/PowerPC/transparent_union.ll Outdated Show resolved Hide resolved

lei137 added 2 commits August 19, 2024 09:50

[PowerPC] Fix codegen for transparent_union function params

c6a8dee

Update codegen for func param with transparent_union attr to be that of the first union member.

apply clang format suggestions

242510d

lei137 added 14 commits August 19, 2024 09:50

add ppc32bit run line

9ca3b7d

add transparent union handling to ppc 32bit arg handling

2cf8131

add support for 32bit enum handling

49d3d02

add enum transparent union clang test

254e16d

add enum IR test

f0ce1bb

add -fshort-enum option to clang tests

761648d

udpate IR code for short enums

8c57ecb

add IR test for ppc 32bit

a7351d8

move code to generate transparent coerce type to useFirstFieldIfTrans…

d431df9

…parentUnion()

apply clang-format

9f4d9e2

update to use CGT.ConvertType(Ty) instead

0fb7cf8

apply clang-format

a50d3da

apply Hubert's suggestion

54b237e

remove IR test as I have verified Hubert's comment is correct.

87286fb

lei137 force-pushed the lei/transparent_union branch from 388f08a to 87286fb Compare August 19, 2024 14:29

lei137 merged commit f95026d into llvm:main Aug 19, 2024
6 of 8 checks passed

lei137 deleted the lei/transparent_union branch August 19, 2024 16:17

lei137 mentioned this pull request Aug 19, 2024

Fix codegen for transparent_union function params #104816

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[PowerPC] Fix codegen for transparent_union function params #101738

[PowerPC] Fix codegen for transparent_union function params #101738

lei137 commented Aug 2, 2024

llvmbot commented Aug 2, 2024 •

edited

Loading

github-actions bot commented Aug 2, 2024 •

edited

Loading

hubert-reinterpretcast commented Aug 5, 2024 •

edited

Loading

efriedma-quic commented Aug 5, 2024

lei137 commented Aug 5, 2024 •

edited

Loading

efriedma-quic commented Aug 6, 2024

lei137 commented Aug 6, 2024

lei137 commented Aug 6, 2024 •

edited

Loading

lei137 commented Aug 16, 2024 •

edited

Loading

amy-kwan left a comment

lei137 commented Aug 16, 2024

efriedma-quic left a comment

[PowerPC] Fix codegen for transparent_union function params #101738

[PowerPC] Fix codegen for transparent_union function params #101738

Conversation

lei137 commented Aug 2, 2024

llvmbot commented Aug 2, 2024 • edited Loading

github-actions bot commented Aug 2, 2024 • edited Loading

hubert-reinterpretcast commented Aug 5, 2024 • edited Loading

efriedma-quic commented Aug 5, 2024

lei137 commented Aug 5, 2024 • edited Loading

efriedma-quic commented Aug 6, 2024

lei137 commented Aug 6, 2024

lei137 commented Aug 6, 2024 • edited Loading

lei137 commented Aug 16, 2024 • edited Loading

amy-kwan left a comment

Choose a reason for hiding this comment

lei137 commented Aug 16, 2024

efriedma-quic left a comment

Choose a reason for hiding this comment

llvmbot commented Aug 2, 2024 •

edited

Loading

github-actions bot commented Aug 2, 2024 •

edited

Loading

hubert-reinterpretcast commented Aug 5, 2024 •

edited

Loading

lei137 commented Aug 5, 2024 •

edited

Loading

lei137 commented Aug 6, 2024 •

edited

Loading

lei137 commented Aug 16, 2024 •

edited

Loading