Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stop UB complaints and autovectorization of scalar fletcher4 implementation #13631

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 4 additions & 20 deletions include/zfs_fletcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,19 +76,19 @@ typedef struct zfs_fletcher_superscalar {
} zfs_fletcher_superscalar_t;

typedef struct zfs_fletcher_sse {
uint64_t v[2] __attribute__((aligned(16)));
uint64_t v[2];
} zfs_fletcher_sse_t;

typedef struct zfs_fletcher_avx {
uint64_t v[4] __attribute__((aligned(32)));
uint64_t v[4];
} zfs_fletcher_avx_t;

typedef struct zfs_fletcher_avx512 {
uint64_t v[8] __attribute__((aligned(64)));
uint64_t v[8];
} zfs_fletcher_avx512_t;

typedef struct zfs_fletcher_aarch64_neon {
uint64_t v[2] __attribute__((aligned(16)));
uint64_t v[2];
} zfs_fletcher_aarch64_neon_t;


Expand Down Expand Up @@ -160,20 +160,4 @@ _ZFS_FLETCHER_H const fletcher_4_ops_t fletcher_4_aarch64_neon_ops;
}
#endif

#if defined(ZFS_UBSAN_ENABLED)
#if defined(__has_attribute)
#if __has_attribute(no_sanitize_undefined)
#define ZFS_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize_undefined))
#elif __has_attribute(no_sanitize)
#define ZFS_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined")))
#else
#error "Compiler has to support attribute "
"`no_sanitize_undefined` or `no_sanitize(\"undefined\")`"
"when compiling with UBSan enabled"
#endif /* __has_attribute(no_sanitize_undefined) */
#endif /* defined(__has_attribute) */
#else
#define ZFS_NO_SANITIZE_UNDEFINED
#endif /* defined(ZFS_UBSAN_ENABLED) */

#endif /* _ZFS_FLETCHER_H */
16 changes: 16 additions & 0 deletions lib/libzfs/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,22 @@ libzfs_la_CFLAGS = $(AM_CFLAGS) $(LIBRARY_CFLAGS)
libzfs_la_CFLAGS += $(LIBCRYPTO_CFLAGS) $(ZLIB_CFLAGS)
libzfs_la_CFLAGS += -fvisibility=hidden

# Less dire than if you override this in the kernel, but autovectorizing
# random portions seems like a great way to encounter crashes in places
# that look, on the surface, like there's no SIMD involved...
#
# We would keep -mgeneral-regs-only, but on e.g. x64, things like atof
# can't be defined without implying using FPU regs, so it's a compile
# time error.
NOVECTOR := -fno-tree-vectorize
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this fix a real problem or is it just a precaution? I would rather not disable auto-vectorization in userspace since we expect the compiler to attempt these things in userspace, where using SIMD should always be safe.

That said, I know for a fact that the compiler will sometimes vectorize the mixing matrix calculations. I am not sure if its vectorization makes a difference, but I am inclined to let it try.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#13605 says hello.

$(addprefix module/zcommon/libzfs_la-,zfs_fletcher.$(OBJEXT) zfs_fletcher.l$(OBJEXT)) : CFLAGS += $(NOVECTOR)
$(addprefix module/zcommon/libzfs_la-,zfs_fletcher_aarch64_neon.$(OBJEXT) zfs_fletcher_aarch64_neon.l$(OBJEXT)) : CFLAGS += $(NOVECTOR)
$(addprefix module/zcommon/libzfs_la-,zfs_fletcher_avx512.$(OBJEXT) zfs_fletcher_avx512.l$(OBJEXT)) : CFLAGS += $(NOVECTOR)
$(addprefix module/zcommon/libzfs_la-,zfs_fletcher_intel.$(OBJEXT) zfs_fletcher_intel.l$(OBJEXT)) : CFLAGS += $(NOVECTOR)
$(addprefix module/zcommon/libzfs_la-,zfs_fletcher_sse.$(OBJEXT) zfs_fletcher_sse.l$(OBJEXT)) : CFLAGS += $(NOVECTOR)
$(addprefix module/zcommon/libzfs_la-,zfs_fletcher_superscalar.$(OBJEXT) zfs_fletcher_superscalar.l$(OBJEXT)) : CFLAGS += $(NOVECTOR)
$(addprefix module/zcommon/libzfs_la-,zfs_fletcher_superscalar4.$(OBJEXT) zfs_fletcher_superscalar4.l$(OBJEXT)) : CFLAGS += $(NOVECTOR)

lib_LTLIBRARIES += libzfs.la
CPPCHECKTARGETS += libzfs.la

Expand Down
16 changes: 16 additions & 0 deletions lib/libzpool/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,22 @@ libzpool_la_CPPFLAGS = $(AM_CPPFLAGS) $(FORCEDEBUG_CPPFLAGS)
libzpool_la_CPPFLAGS += -I$(srcdir)/include/os/@ac_system_l@/zfs
libzpool_la_CPPFLAGS += -DLIB_ZPOOL_BUILD

# Less dire than if you override this in the kernel, but autovectorizing
# random portions seems like a great way to encounter crashes in places
# that look, on the surface, like there's no SIMD involved...
#
# We would keep -mgeneral-regs-only, but on e.g. x64, things like atof
# can't be defined without implying using FPU regs, so it's a compile
# time error.
NOVECTOR := -fno-tree-vectorize
$(addprefix module/zcommon/libzpool_la-,zfs_fletcher.$(OBJEXT) zfs_fletcher.l$(OBJEXT)) : CFLAGS += $(NOVECTOR)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The same as above.

$(addprefix module/zcommon/libzpool_la-,zfs_fletcher_aarch64_neon.$(OBJEXT) zfs_fletcher_aarch64_neon.l$(OBJEXT)) : CFLAGS += $(NOVECTOR)
$(addprefix module/zcommon/libzpool_la-,zfs_fletcher_avx512.$(OBJEXT) zfs_fletcher_avx512.l$(OBJEXT)) : CFLAGS += $(NOVECTOR)
$(addprefix module/zcommon/libzpool_la-,zfs_fletcher_intel.$(OBJEXT) zfs_fletcher_intel.l$(OBJEXT)) : CFLAGS += $(NOVECTOR)
$(addprefix module/zcommon/libzpool_la-,zfs_fletcher_sse.$(OBJEXT) zfs_fletcher_sse.l$(OBJEXT)) : CFLAGS += $(NOVECTOR)
$(addprefix module/zcommon/libzpool_la-,zfs_fletcher_superscalar.$(OBJEXT) zfs_fletcher_superscalar.l$(OBJEXT)) : CFLAGS += $(NOVECTOR)
$(addprefix module/zcommon/libzpool_la-,zfs_fletcher_superscalar4.$(OBJEXT) zfs_fletcher_superscalar4.l$(OBJEXT)) : CFLAGS += $(NOVECTOR)

lib_LTLIBRARIES += libzpool.la
CPPCHECKTARGETS += libzpool.la

Expand Down
9 changes: 9 additions & 0 deletions module/Kbuild.in
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,15 @@ ifneq ($(KBUILD_EXTMOD),)
@CONFIG_QAT_TRUE@KBUILD_EXTRA_SYMBOLS += @QAT_SYMBOLS@
endif

# Woe betide ye who override this, for yours is the kingdom of mysterious segfaults
# absolutely everywhere.
#
# Free warning to anyone considering it - -march=native or the like later in the line
# will undo -mgeneral-regs-only, and gcc's -O2 starting in 12 does autovectorizing.
#
# Good luck.
ZFS_MODULE_CFLAGS += -mgeneral-regs-only -fno-tree-vectorize
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Linux kernel really should handle this for us by disabling all of the vector instructions such that there is no need to pass either -mgeneral-regs-only or -fno-tree-vectorize.

-mgeneral-regs-only is a convenience flag meant to avoid requiring people to know about all various isa extensions. It is not available on all architectures and it will break the build on x86_64 on GCC versions older than gcc 7.

-fno-tree-vectorize is only specified in Linux's arch/csky/Makefile and no where else in Linux, since it really should not be necessary. That said, I am fine with passing -fno-tree-vectorize here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't just add it for fun, no.

As the comment above explains, if the user forcibly overrides -march later in the line, then these are overridden as well, and boom goes the dynamite.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright. -mgeneral-regs-only is not backward compatible with older compilers so unless we write an autotools check for it, we probably should drop it in favor of only doing -fno-tree-vectorize.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was going to write an autoconf check, yes.

I think -fno-tree-vectorize might also need a check of that nature, but it's been a while since I looked, so I'll check when I get back to this.


asflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)

Expand Down
53 changes: 31 additions & 22 deletions module/Makefile.bsd
Original file line number Diff line number Diff line change
Expand Up @@ -423,25 +423,34 @@ CFLAGS.zil.c= -Wno-cast-qual
CFLAGS.zio.c= -Wno-cast-qual
CFLAGS.zrlock.c= -Wno-cast-qual
CFLAGS.zfs_zstd.c= -Wno-cast-qual -Wno-pointer-arith
CFLAGS.entropy_common.c= -fno-tree-vectorize -U__BMI__
CFLAGS.error_private.c= -fno-tree-vectorize -U__BMI__
CFLAGS.fse_decompress.c= -fno-tree-vectorize -U__BMI__
CFLAGS.pool.c= -fno-tree-vectorize -U__BMI__
CFLAGS.xxhash.c= -fno-tree-vectorize -U__BMI__
CFLAGS.zstd_common.c= -fno-tree-vectorize -U__BMI__
CFLAGS.fse_compress.c= -fno-tree-vectorize -U__BMI__
CFLAGS.hist.c= -fno-tree-vectorize -U__BMI__
CFLAGS.huf_compress.c= -fno-tree-vectorize -U__BMI__
CFLAGS.zstd_compress.c= -fno-tree-vectorize -U__BMI__
CFLAGS.zstd_compress_literals.c= -fno-tree-vectorize -U__BMI__
CFLAGS.zstd_compress_sequences.c= -fno-tree-vectorize -U__BMI__
CFLAGS.zstd_compress_superblock.c= -fno-tree-vectorize -U__BMI__
CFLAGS.zstd_double_fast.c= -fno-tree-vectorize -U__BMI__
CFLAGS.zstd_fast.c= -fno-tree-vectorize -U__BMI__
CFLAGS.zstd_lazy.c= -fno-tree-vectorize -U__BMI__
CFLAGS.zstd_ldm.c= -fno-tree-vectorize -U__BMI__
CFLAGS.zstd_opt.c= -fno-tree-vectorize -U__BMI__
CFLAGS.huf_decompress.c= -fno-tree-vectorize -U__BMI__
CFLAGS.zstd_ddict.c= -fno-tree-vectorize -U__BMI__
CFLAGS.zstd_decompress.c= -fno-tree-vectorize -U__BMI__
CFLAGS.zstd_decompress_block.c= -fno-tree-vectorize -U__BMI__
CFLAGS.entropy_common.c=-U__BMI__
CFLAGS.error_private.c= -U__BMI__
CFLAGS.fse_decompress.c= -U__BMI__
CFLAGS.pool.c= -U__BMI__
CFLAGS.xxhash.c= -U__BMI__
CFLAGS.zstd_common.c= -U__BMI__
CFLAGS.fse_compress.c= -U__BMI__
CFLAGS.hist.c= -U__BMI__
CFLAGS.huf_compress.c= -U__BMI__
CFLAGS.zstd_compress.c= -U__BMI__
CFLAGS.zstd_compress_literals.c= -U__BMI__
CFLAGS.zstd_compress_sequences.c= -U__BMI__
CFLAGS.zstd_compress_superblock.c= -U__BMI__
CFLAGS.zstd_double_fast.c= -U__BMI__
CFLAGS.zstd_fast.c= -U__BMI__
CFLAGS.zstd_lazy.c= -U__BMI__
CFLAGS.zstd_ldm.c= -U__BMI__
CFLAGS.zstd_opt.c= -U__BMI__
CFLAGS.huf_decompress.c= -U__BMI__
CFLAGS.zstd_ddict.c= -U__BMI__
CFLAGS.zstd_decompress.c= -U__BMI__
CFLAGS.zstd_decompress_block.c= -U__BMI__

# Woe betide ye who override this, for yours is the kingdom of mysterious segfaults
# absolutely everywhere.
#
# Free warning to anyone considering it - -march=native or the like later in the line
# will undo -mgeneral-regs-only, and gcc's -O2 starting in 12 does autovectorizing.
#
# Good luck.
CFLAGS += -mgeneral-regs-only -fno-tree-vectorize
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure how far back FreeBSD's compiler support goes, but the same remark about -mgeneral-regs-only being a potential problem applies here.

4 changes: 0 additions & 4 deletions module/zcommon/zfs_fletcher.c
Original file line number Diff line number Diff line change
Expand Up @@ -300,21 +300,18 @@ fletcher_2_byteswap(const void *buf, uint64_t size,
(void) fletcher_2_incremental_byteswap((void *) buf, size, zcp);
}

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_scalar_init(fletcher_4_ctx_t *ctx)
{
ZIO_SET_CHECKSUM(&ctx->scalar, 0, 0, 0, 0);
}

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_scalar_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)
{
memcpy(zcp, &ctx->scalar, sizeof (zio_cksum_t));
}

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_scalar_native(fletcher_4_ctx_t *ctx, const void *buf,
uint64_t size)
Expand All @@ -338,7 +335,6 @@ fletcher_4_scalar_native(fletcher_4_ctx_t *ctx, const void *buf,
ZIO_SET_CHECKSUM(&ctx->scalar, a, b, c, d);
}

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_scalar_byteswap(fletcher_4_ctx_t *ctx, const void *buf,
uint64_t size)
Expand Down
3 changes: 1 addition & 2 deletions module/zcommon/zfs_fletcher_aarch64_neon.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,14 @@
#include <sys/spa_checksum.h>
#include <sys/string.h>
#include <zfs_fletcher.h>
#include <sys/zfs_context.h>

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_aarch64_neon_init(fletcher_4_ctx_t *ctx)
{
memset(ctx->aarch64_neon, 0, 4 * sizeof (zfs_fletcher_aarch64_neon_t));
}

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_aarch64_neon_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)
{
Expand Down
3 changes: 1 addition & 2 deletions module/zcommon/zfs_fletcher_avx512.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,18 @@
#include <sys/string.h>
#include <sys/simd.h>
#include <zfs_fletcher.h>
#include <sys/zfs_context.h>

#ifdef __linux__
#define __asm __asm__ __volatile__
#endif

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_avx512f_init(fletcher_4_ctx_t *ctx)
{
memset(ctx->avx512, 0, 4 * sizeof (zfs_fletcher_avx512_t));
}

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_avx512f_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)
{
Expand Down
3 changes: 1 addition & 2 deletions module/zcommon/zfs_fletcher_intel.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,14 @@
#include <sys/string.h>
#include <sys/simd.h>
#include <zfs_fletcher.h>
#include <sys/zfs_context.h>

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_avx2_init(fletcher_4_ctx_t *ctx)
{
memset(ctx->avx, 0, 4 * sizeof (zfs_fletcher_avx_t));
}

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_avx2_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)
{
Expand Down
3 changes: 1 addition & 2 deletions module/zcommon/zfs_fletcher_sse.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,14 @@
#include <sys/string.h>
#include <sys/byteorder.h>
#include <zfs_fletcher.h>
#include <sys/zfs_context.h>

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_sse2_init(fletcher_4_ctx_t *ctx)
{
memset(ctx->sse, 0, 4 * sizeof (zfs_fletcher_sse_t));
}

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_sse2_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)
{
Expand Down
11 changes: 7 additions & 4 deletions module/zcommon/zfs_fletcher_superscalar.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,20 @@
#include <sys/spa_checksum.h>
#include <sys/string.h>
#include <zfs_fletcher.h>
#include <sys/zfs_context.h>

/*
* These functions are all annotated to forbid auto-vectorization, because
* we would like some implementations that work even if SIMD is on the fritz,
* and auto-vectorizing random code can lead to strange side effects...
*/

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_superscalar_init(fletcher_4_ctx_t *ctx)
{
memset(ctx->superscalar, 0, 4 * sizeof (zfs_fletcher_superscalar_t));
}

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_superscalar_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)
{
Expand All @@ -70,7 +75,6 @@ fletcher_4_superscalar_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)
ZIO_SET_CHECKSUM(zcp, A, B, C, D);
}

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_superscalar_native(fletcher_4_ctx_t *ctx,
const void *buf, uint64_t size)
Expand Down Expand Up @@ -110,7 +114,6 @@ fletcher_4_superscalar_native(fletcher_4_ctx_t *ctx,
ctx->superscalar[3].v[1] = d2;
}

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_superscalar_byteswap(fletcher_4_ctx_t *ctx,
const void *buf, uint64_t size)
Expand Down
11 changes: 7 additions & 4 deletions module/zcommon/zfs_fletcher_superscalar4.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,20 @@
#include <sys/spa_checksum.h>
#include <sys/string.h>
#include <zfs_fletcher.h>
#include <sys/zfs_context.h>

/*
* These functions are all annotated to forbid auto-vectorization, because
* we would like some implementations that work even if SIMD is on the fritz,
* and auto-vectorizing random code can lead to strange side effects...
*/

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_superscalar4_init(fletcher_4_ctx_t *ctx)
{
memset(ctx->superscalar, 0, 4 * sizeof (zfs_fletcher_superscalar_t));
}

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_superscalar4_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)
{
Expand Down Expand Up @@ -84,7 +89,6 @@ fletcher_4_superscalar4_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)
ZIO_SET_CHECKSUM(zcp, A, B, C, D);
}

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_superscalar4_native(fletcher_4_ctx_t *ctx,
const void *buf, uint64_t size)
Expand Down Expand Up @@ -150,7 +154,6 @@ fletcher_4_superscalar4_native(fletcher_4_ctx_t *ctx,
ctx->superscalar[3].v[3] = d4;
}

ZFS_NO_SANITIZE_UNDEFINED
static void
fletcher_4_superscalar4_byteswap(fletcher_4_ctx_t *ctx,
const void *buf, uint64_t size)
Expand Down