From a09bcdf6831ffb18e39ca69bb809b866be5afe88 Mon Sep 17 00:00:00 2001 From: Andrew Adams Date: Fri, 13 Oct 2023 16:31:44 -0700 Subject: [PATCH 1/2] Generate simpler LLVM IR for shuffles that recursively become broadcasts --- src/CodeGen_LLVM.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/CodeGen_LLVM.cpp b/src/CodeGen_LLVM.cpp index b4fd79d88a15..0e9d2d84b512 100644 --- a/src/CodeGen_LLVM.cpp +++ b/src/CodeGen_LLVM.cpp @@ -4062,6 +4062,25 @@ void CodeGen_LLVM::visit(const Shuffle *op) { vecs.push_back(codegen(e)); } + // Handle simple broadcasts, which can be generated by the recursive calls below. + if (op->vectors.size() == 1) { + bool all_indices_the_same = true; + internal_assert(!op->indices.empty()); + for (int i : op->indices) { + all_indices_the_same &= (i == op->indices[0]); + } + if (all_indices_the_same) { + value = codegen(op->vectors[0]); + if (value->getType()->isVectorTy()) { + value = builder->CreateExtractElement(value, ConstantInt::get(i32_t, op->indices[0])); + } else { + internal_assert(op->indices[0] == 0); + } + value = create_broadcast(value, op->indices.size()); + return; + } + } + if (op->is_interleave()) { value = interleave_vectors(vecs); } else if (op->is_concat()) { From a7d1b8c37e7bb2ccda353195448d3eca1dbb0a35 Mon Sep 17 00:00:00 2001 From: Andrew Adams Date: Fri, 13 Oct 2023 16:40:18 -0700 Subject: [PATCH 2/2] Don't re-codegen arg --- src/CodeGen_LLVM.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CodeGen_LLVM.cpp b/src/CodeGen_LLVM.cpp index 0e9d2d84b512..610dd3719a57 100644 --- a/src/CodeGen_LLVM.cpp +++ b/src/CodeGen_LLVM.cpp @@ -4070,7 +4070,7 @@ void CodeGen_LLVM::visit(const Shuffle *op) { all_indices_the_same &= (i == op->indices[0]); } if (all_indices_the_same) { - value = codegen(op->vectors[0]); + value = vecs[0]; if (value->getType()->isVectorTy()) { value = builder->CreateExtractElement(value, ConstantInt::get(i32_t, op->indices[0])); } else {