From 21b62c1439f4063ec4386d2c4068a27d6df9bafc Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Wed, 23 Mar 2022 19:24:14 +0100 Subject: [PATCH 1/4] s2: Simplify asm code --- s2/_generate/gen.go | 15 +- s2/encodeblock_amd64.go | 2 + s2/encodeblock_amd64.s | 352 ++-------------------------------------- 3 files changed, 19 insertions(+), 350 deletions(-) diff --git a/s2/_generate/gen.go b/s2/_generate/gen.go index 9ea8eeded0..490b9bdf80 100644 --- a/s2/_generate/gen.go +++ b/s2/_generate/gen.go @@ -36,6 +36,13 @@ func main() { Constraint(buildtags.Term("gc").ToConstraint()) Constraint(buildtags.Not("noasm").ToConstraint()) + TEXT("_dummy_", 0, "func()") + Comment("#ifdef GOAMD64_v4") + Comment("#undef GOAMD64_v3") + Comment("#define GOAMD64_v3") + Comment("#endif") + RET() + o := options{ bmi1: false, bmi2: false, @@ -2507,14 +2514,6 @@ func (o options) matchLen(name string, a, b, len reg.GPVirtual, end LabelRef) re // 2016 BMI :TZCNT r64, r64 L: 0.57ns= 2.0c T: 0.29ns= 1.00c // 315 AMD64 :BSF r64, r64 L: 0.88ns= 3.1c T: 0.86ns= 3.00c TZCNTQ(tmp, tmp) - Comment("#define TZCNTQ_EMITTED 1") - Comment("#endif\n") - Comment("#ifdef GOAMD64_v4") - TZCNTQ(tmp, tmp) - Comment("#define TZCNTQ_EMITTED 1") - Comment("#endif\n") - Comment("#ifdef TZCNTQ_EMITTED") - Comment("#undef TZCNTQ_EMITTED") Comment("#else") BSFQ(tmp, tmp) Comment("#endif") diff --git a/s2/encodeblock_amd64.go b/s2/encodeblock_amd64.go index d9312e5b92..88f27c0990 100644 --- a/s2/encodeblock_amd64.go +++ b/s2/encodeblock_amd64.go @@ -5,6 +5,8 @@ package s2 +func _dummy_() + // encodeBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. diff --git a/s2/encodeblock_amd64.s b/s2/encodeblock_amd64.s index 729dbf536c..3d51d95401 100644 --- a/s2/encodeblock_amd64.s +++ b/s2/encodeblock_amd64.s @@ -1,10 +1,18 @@ // Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT. -//go:build !appengine && !noasm && gc && !noasm +// go:build !appengine && !noasm && gc && !noasm // +build !appengine,!noasm,gc,!noasm #include "textflag.h" +// func _dummy_() +TEXT ·_dummy_(SB), $0 +#ifdef GOAMD64_v4 +#undef GOAMD64_v3 +#define GOAMD64_v3 +#endif + RET + // func encodeBlockAsm(dst []byte, src []byte) int // Requires: BMI, SSE2 TEXT ·encodeBlockAsm(SB), $65560-56 @@ -253,17 +261,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -791,17 +788,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -1466,17 +1452,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm4MB: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -1963,17 +1938,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm4MB: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -2495,6 +2459,7 @@ memmove_repeat_emit_encodeBlockAsm12B: JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32 + JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8: @@ -2597,17 +2562,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -2979,17 +2933,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -3510,17 +3453,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -3892,17 +3824,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -4423,17 +4344,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -4795,17 +4705,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -5225,17 +5124,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -6214,17 +6102,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -7138,17 +7015,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -7916,17 +7782,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -8694,17 +8549,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -9584,17 +9428,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R10, R10 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R10, R10 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R10, R10 @@ -9918,17 +9751,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -10448,17 +10270,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 TZCNTQ R10, R10 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R10, R10 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R10, R10 @@ -10739,17 +10550,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -11226,17 +11026,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R10, R10 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R10, R10 @@ -11517,17 +11306,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -12004,17 +11782,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R10, R10 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R10, R10 @@ -12295,17 +12062,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -12782,17 +12538,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R10, R10 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R10, R10 @@ -13071,17 +12816,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -13461,17 +13195,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -14068,17 +13791,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -14604,17 +14316,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -15140,17 +14841,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -15676,17 +15366,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -16644,17 +16323,6 @@ matchlen_loopback_standalone: #ifdef GOAMD64_v3 TZCNTQ BX, BX -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ BX, BX - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ BX, BX From f6311d5841118ec653e17969d126aa0ad89fafd1 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Thu, 24 Mar 2022 11:04:06 +0100 Subject: [PATCH 2/4] Check if defined before undeffing --- s2/_generate/gen.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/s2/_generate/gen.go b/s2/_generate/gen.go index 490b9bdf80..90fbeea5f1 100644 --- a/s2/_generate/gen.go +++ b/s2/_generate/gen.go @@ -38,9 +38,10 @@ func main() { TEXT("_dummy_", 0, "func()") Comment("#ifdef GOAMD64_v4") - Comment("#undef GOAMD64_v3") + Comment("#ifndef GOAMD64_v3") Comment("#define GOAMD64_v3") Comment("#endif") + Comment("#endif") RET() o := options{ From fed5f67365bac0039576e5de3cc40fb827e75296 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Thu, 24 Mar 2022 17:03:51 +0100 Subject: [PATCH 3/4] Regen code --- s2/encodeblock_amd64.s | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/s2/encodeblock_amd64.s b/s2/encodeblock_amd64.s index 3d51d95401..b5a057d972 100644 --- a/s2/encodeblock_amd64.s +++ b/s2/encodeblock_amd64.s @@ -1,6 +1,6 @@ // Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT. -// go:build !appengine && !noasm && gc && !noasm +//go:build !appengine && !noasm && gc && !noasm // +build !appengine,!noasm,gc,!noasm #include "textflag.h" @@ -8,8 +8,9 @@ // func _dummy_() TEXT ·_dummy_(SB), $0 #ifdef GOAMD64_v4 -#undef GOAMD64_v3 +#ifndef GOAMD64_v3 #define GOAMD64_v3 +#endif #endif RET @@ -2459,7 +2460,6 @@ memmove_repeat_emit_encodeBlockAsm12B: JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32 - JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8: From b5de463e7a1b5f49c27899c0bafaffbce146b27d Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Fri, 25 Mar 2022 10:16:28 +0100 Subject: [PATCH 4/4] Comment for CI check --- s2/_generate/gen.go | 1 + 1 file changed, 1 insertion(+) diff --git a/s2/_generate/gen.go b/s2/_generate/gen.go index 90fbeea5f1..8d07364407 100644 --- a/s2/_generate/gen.go +++ b/s2/_generate/gen.go @@ -36,6 +36,7 @@ func main() { Constraint(buildtags.Term("gc").ToConstraint()) Constraint(buildtags.Not("noasm").ToConstraint()) + // We need a function to add comments. TEXT("_dummy_", 0, "func()") Comment("#ifdef GOAMD64_v4") Comment("#ifndef GOAMD64_v3")