Skip to content

Commit

Permalink
Merge from 'main' to 'sycl-web' (#8)
Browse files Browse the repository at this point in the history
  CONFLICT (content): Merge conflict in clang/lib/Frontend/CompilerInvocation.cpp
  • Loading branch information
AlexeySachkov committed Jan 22, 2021
2 parents 94660ed + a7dcd3a commit 5b74cc3
Show file tree
Hide file tree
Showing 37 changed files with 323 additions and 251 deletions.
17 changes: 10 additions & 7 deletions clang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2205,12 +2205,6 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
}
}

if (const Arg *A = Args.getLastArg(OPT_fcf_protection_EQ)) {
StringRef Name = A->getValue();
if (Name == "full" || Name == "branch") {
Opts.CFProtectionBranch = 1;
}
}
// -cl-std only applies for OpenCL language standards.
// Override the -std option in this case.
if (const Arg *A = Args.getLastArg(OPT_cl_std_EQ)) {
Expand All @@ -2233,7 +2227,6 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
LangStd = OpenCLLangStd;
}

Opts.SYCLIsDevice = Opts.SYCL && Args.hasArg(options::OPT_fsycl_is_device);
if (Opts.SYCL) {
Opts.SYCLIsDevice = Args.hasArg(options::OPT_fsycl_is_device);
Opts.SYCLIsHost = Args.hasArg(options::OPT_fsycl_is_host);
Expand Down Expand Up @@ -2276,6 +2269,15 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,

CompilerInvocation::setLangDefaults(Opts, IK, T, Includes, LangStd);

if (const Arg *A = Args.getLastArg(OPT_fcf_protection_EQ)) {
StringRef Name = A->getValue();
if (Name == "full" || Name == "branch") {
Opts.CFProtectionBranch = 1;
}
}

Opts.SYCLIsDevice = Opts.SYCL && Args.hasArg(options::OPT_fsycl_is_device);

// -cl-strict-aliasing needs to emit diagnostic in the case where CL > 1.0.
// This option should be deprecated for CL > 1.0 because
// this option was added for compatibility with OpenCL 1.0.
Expand Down Expand Up @@ -3014,6 +3016,7 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res,
// PIClevel and PIELevel are needed during code generation and this should be
// set regardless of the input type.
LangOpts.PICLevel = getLastArgIntValue(Args, OPT_pic_level, 0, Diags);
LangOpts.PIE = Args.hasArg(OPT_pic_is_pie);
parseSanitizerKinds("-fsanitize=", Args.getAllArgValues(OPT_fsanitize_EQ),
Diags, LangOpts.Sanitize);
} else {
Expand Down
33 changes: 20 additions & 13 deletions libc/src/string/memory_utils/memcpy_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,28 +72,35 @@ static void CopyBlockOverlap(char *__restrict dst, const char *__restrict src,

// Copies `count` bytes by blocks of `kBlockSize` bytes.
// Copies at the start and end of the buffer are unaligned.
// Copies in the middle of the buffer are aligned to `kBlockSize`.
// Copies in the middle of the buffer are aligned to `kAlignment`.
//
// e.g. with
// [12345678123456781234567812345678]
// [__XXXXXXXXXXXXXXXXXXXXXXXXXXX___]
// [__XXXXXXXX______________________]
// [________XXXXXXXX________________]
// [________________XXXXXXXX________]
// [_____________________XXXXXXXX___]
// [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
// [__XXXX___________________________]
// [_____XXXXXXXX____________________]
// [_____________XXXXXXXX____________]
// [_____________________XXXXXXXX____]
// [______________________XXXXXXXX___]
//
// Precondition: `count > 2 * kBlockSize` for efficiency.
// `count >= kBlockSize` for correctness.
template <size_t kBlockSize>
// Precondition: `kAlignment <= kBlockSize`
// `count > 2 * kBlockSize` for efficiency.
// `count >= kAlignment` for correctness.
template <size_t kBlockSize, size_t kAlignment = kBlockSize>
static void CopyAlignedBlocks(char *__restrict dst, const char *__restrict src,
size_t count) {
CopyBlock<kBlockSize>(dst, src); // Copy first block
static_assert(is_power2(kAlignment), "kAlignment must be a power of two");
static_assert(is_power2(kBlockSize), "kBlockSize must be a power of two");
static_assert(kAlignment <= kBlockSize,
"kAlignment must be less or equal to block size");
CopyBlock<kAlignment>(dst, src); // Copy first block

// Copy aligned blocks
const size_t ofla = offset_from_last_aligned<kBlockSize>(src);
const size_t ofla = offset_from_last_aligned<kAlignment>(src);
const size_t limit = count + ofla - kBlockSize;
for (size_t offset = kBlockSize; offset < limit; offset += kBlockSize)
CopyBlock<kBlockSize>(dst - ofla + offset, src - ofla + offset);
for (size_t offset = kAlignment; offset < limit; offset += kBlockSize)
CopyBlock<kBlockSize>(dst - ofla + offset,
assume_aligned<kAlignment>(src - ofla + offset));

CopyLastBlock<kBlockSize>(dst, src, count); // Copy last block
}
Expand Down
4 changes: 4 additions & 0 deletions libc/src/string/memory_utils/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ static inline intptr_t offset_to_next_cache_line(const void *ptr) {
return offset_to_next_aligned<LLVM_LIBC_CACHELINE_SIZE>(ptr);
}

template <size_t alignment, typename T> static T *assume_aligned(T *ptr) {
return reinterpret_cast<T *>(__builtin_assume_aligned(ptr, alignment));
}

} // namespace __llvm_libc

#endif // LLVM_LIBC_SRC_MEMORY_UTILS_H
39 changes: 38 additions & 1 deletion libc/test/src/string/memory_utils/memcpy_utils_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,24 @@ TEST(MemcpyUtilsTest, CopyAlignedBlocks) {
EXPECT_STREQ(trace.Read(), "011121111111");
}

TEST(MemcpyUtilsTest, MaxReloads) {
TEST(MemcpyUtilsTest, CopyAlignedBlocksWithAlignment) {
auto &trace = GetTrace();
// Source is aligned and multiple of alignment.
// "11111111"
trace.Clear();
CopyAlignedBlocks<8, 4>(I(0), I(0), 8);
EXPECT_STREQ(trace.Write(), "22221111");
EXPECT_STREQ(trace.Read(), "22221111");

// Source is aligned and multiple of alignment.
// "111111111"
trace.Clear();
CopyAlignedBlocks<8, 4>(I(0), I(0), 9);
EXPECT_STREQ(trace.Write(), "122211111");
EXPECT_STREQ(trace.Read(), "122211111");
}

TEST(MemcpyUtilsTest, CopyAlignedBlocksMaxReloads) {
auto &trace = GetTrace();
for (size_t alignment = 0; alignment < 32; ++alignment) {
for (size_t count = 64; count < 768; ++count) {
Expand All @@ -231,4 +248,24 @@ TEST(MemcpyUtilsTest, MaxReloads) {
}
}

TEST(MemcpyUtilsTest, CopyAlignedBlocksWithAlignmentMaxReloads) {
auto &trace = GetTrace();
for (size_t alignment = 0; alignment < 32; ++alignment) {
for (size_t count = 64; count < 768; ++count) {
trace.Clear();
// We should never reload more than twice when copying from count = 2x32.
CopyAlignedBlocks<32, 16>(I(alignment), I(0), count);
const char *const written = trace.Write();
// First bytes are untouched.
for (size_t i = 0; i < alignment; ++i)
EXPECT_EQ(written[i], '0');
// Next bytes are loaded once or twice but no more.
for (size_t i = alignment; i < count; ++i) {
EXPECT_GE(written[i], '1');
EXPECT_LE(written[i], '2');
}
}
}
}

} // namespace __llvm_libc
26 changes: 26 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20887,6 +20887,32 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
continue;
}

// Last chance - see if the vector is another shuffle and if it
// uses one of the existing candidate shuffle ops.
if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
int InnerIdx = CurrentSVN->getMaskElt(Idx);
if (InnerIdx < 0) {
Mask.push_back(-1);
continue;
}
SDValue InnerVec = (InnerIdx < (int)NumElts)
? CurrentSVN->getOperand(0)
: CurrentSVN->getOperand(1);
if (InnerVec.isUndef()) {
Mask.push_back(-1);
continue;
}
InnerIdx %= NumElts;
if (InnerVec == SV0) {
Mask.push_back(InnerIdx);
continue;
}
if (InnerVec == SV1) {
Mask.push_back(InnerIdx + NumElts);
continue;
}
}

// Bail out if we cannot convert the shuffle pair into a single shuffle.
return false;
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Scalar/LoopSink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ static cl::opt<unsigned> MaxNumberOfUseBBsForSinking(
cl::desc("Do not sink instructions that have too many uses."));

static cl::opt<bool> EnableMSSAInLoopSink(
"enable-mssa-in-loop-sink", cl::Hidden, cl::init(false),
"enable-mssa-in-loop-sink", cl::Hidden, cl::init(true),
cl::desc("Enable MemorySSA for LoopSink in new pass manager"));

static cl::opt<bool> EnableMSSAInLegacyLoopSink(
Expand Down
Loading

0 comments on commit 5b74cc3

Please sign in to comment.