diff --git a/s2/_generate/gen.go b/s2/_generate/gen.go index 56c5536059..d5ff78930b 100644 --- a/s2/_generate/gen.go +++ b/s2/_generate/gen.go @@ -418,18 +418,37 @@ func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, m } Label("repeat_extend_back_end_" + name) + { + // tmp = s-nextEmit + tmp := GP64() + MOVL(base.As32(), tmp.As32()) + SUBL(nextEmitL, tmp.As32()) + // tmp = &dst + s-nextEmit + LEAQ(Mem{Base: dst, Index: tmp, Scale: 1, Disp: literalMaxOverhead}, tmp) + CMPQ(tmp, dstLimitPtrQ) + JB(LabelRef("repeat_dst_size_check_" + name)) + ri, err := ReturnIndex(0).Resolve() + if err != nil { + panic(err) + } + MOVQ(U32(0), ri.Addr) + if o.avx2 { + VZEROUPPER() + } + RET() + } + Label("repeat_dst_size_check_" + name) + // Base is now at start. Emit until base. // d += emitLiteral(dst[d:], src[nextEmit:base]) - if true { - o.emitLiteralsDstP(nextEmitL, base, src, dst, "repeat_emit_"+name) - } + o.emitLiteralsDstP(nextEmitL, base, src, dst, "repeat_emit_"+name) // Extend forward { // s += 4 + checkRep ADDL(U8(4+checkRep), s) - if true { + { // candidate := s - repeat + 4 + checkRep MOVL(s, candidate) SUBL(repeatL, candidate) // candidate = s - repeat @@ -458,7 +477,7 @@ func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, m } } // Emit - if true { + { // length = s-base length := GP32() MOVL(s, length) @@ -602,7 +621,7 @@ func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, m Label("match_extend_back_end_" + name) // Bail if we exceed the maximum size. - if true { + { // tmp = s-nextEmit tmp := GP64() MOVL(s, tmp.As32()) diff --git a/s2/encode_all.go b/s2/encode_all.go index 5e57995d48..9977045696 100644 --- a/s2/encode_all.go +++ b/s2/encode_all.go @@ -117,6 +117,12 @@ func encodeBlockGo(dst, src []byte) (d int) { i-- base-- } + + // Bail if we exceed the maximum size. + if d+(base-nextEmit) > dstLimit { + return 0 + } + d += emitLiteral(dst[d:], src[nextEmit:base]) // Extend forward @@ -152,7 +158,6 @@ func encodeBlockGo(dst, src []byte) (d int) { if s >= sLimit { goto emitRemainder } - cv = load64(src, s) continue } @@ -325,6 +330,11 @@ func encodeBlockSnappyGo(dst, src []byte) (d int) { i-- base-- } + // Bail if we exceed the maximum size. + if d+(base-nextEmit) > dstLimit { + return 0 + } + d += emitLiteral(dst[d:], src[nextEmit:base]) // Extend forward @@ -532,6 +542,11 @@ searchDict: i-- base-- } + // Bail if we exceed the maximum size. + if d+(base-nextEmit) > dstLimit { + return 0 + } + d += emitLiteral(dst[d:], src[nextEmit:base]) if debug && nextEmit != base { fmt.Println("emitted ", base-nextEmit, "literals") @@ -880,6 +895,11 @@ searchDict: i-- base-- } + // Bail if we exceed the maximum size. + if d+(base-nextEmit) > dstLimit { + return 0 + } + d += emitLiteral(dst[d:], src[nextEmit:base]) if debug && nextEmit != base { fmt.Println("emitted ", base-nextEmit, "literals") diff --git a/s2/encodeblock_amd64.s b/s2/encodeblock_amd64.s index 5f110d1940..2ff5b33401 100644 --- a/s2/encodeblock_amd64.s +++ b/s2/encodeblock_amd64.s @@ -100,6 +100,15 @@ repeat_extend_back_loop_encodeBlockAsm: JNZ repeat_extend_back_loop_encodeBlockAsm repeat_extend_back_end_encodeBlockAsm: + MOVL SI, BX + SUBL 12(SP), BX + LEAQ 5(AX)(BX*1), BX + CMPQ BX, (SP) + JB repeat_dst_size_check_encodeBlockAsm + MOVQ $0x00000000, ret+48(FP) + RET + +repeat_dst_size_check_encodeBlockAsm: MOVL 12(SP), BX CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm @@ -1513,6 +1522,15 @@ repeat_extend_back_loop_encodeBlockAsm4MB: JNZ repeat_extend_back_loop_encodeBlockAsm4MB repeat_extend_back_end_encodeBlockAsm4MB: + MOVL SI, BX + SUBL 12(SP), BX + LEAQ 4(AX)(BX*1), BX + CMPQ BX, (SP) + JB repeat_dst_size_check_encodeBlockAsm4MB + MOVQ $0x00000000, ret+48(FP) + RET + +repeat_dst_size_check_encodeBlockAsm4MB: MOVL 12(SP), BX CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm4MB @@ -2828,6 +2846,15 @@ repeat_extend_back_loop_encodeBlockAsm12B: JNZ repeat_extend_back_loop_encodeBlockAsm12B repeat_extend_back_end_encodeBlockAsm12B: + MOVL SI, BX + SUBL 12(SP), BX + LEAQ 3(AX)(BX*1), BX + CMPQ BX, (SP) + JB repeat_dst_size_check_encodeBlockAsm12B + MOVQ $0x00000000, ret+48(FP) + RET + +repeat_dst_size_check_encodeBlockAsm12B: MOVL 12(SP), BX CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B @@ -3903,6 +3930,15 @@ repeat_extend_back_loop_encodeBlockAsm10B: JNZ repeat_extend_back_loop_encodeBlockAsm10B repeat_extend_back_end_encodeBlockAsm10B: + MOVL SI, BX + SUBL 12(SP), BX + LEAQ 3(AX)(BX*1), BX + CMPQ BX, (SP) + JB repeat_dst_size_check_encodeBlockAsm10B + MOVQ $0x00000000, ret+48(FP) + RET + +repeat_dst_size_check_encodeBlockAsm10B: MOVL 12(SP), BX CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm10B @@ -4978,6 +5014,15 @@ repeat_extend_back_loop_encodeBlockAsm8B: JNZ repeat_extend_back_loop_encodeBlockAsm8B repeat_extend_back_end_encodeBlockAsm8B: + MOVL SI, BX + SUBL 12(SP), BX + LEAQ 3(AX)(BX*1), BX + CMPQ BX, (SP) + JB repeat_dst_size_check_encodeBlockAsm8B + MOVQ $0x00000000, ret+48(FP) + RET + +repeat_dst_size_check_encodeBlockAsm8B: MOVL 12(SP), BX CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm8B @@ -10756,6 +10801,15 @@ repeat_extend_back_loop_encodeSnappyBlockAsm: JNZ repeat_extend_back_loop_encodeSnappyBlockAsm repeat_extend_back_end_encodeSnappyBlockAsm: + MOVL SI, BX + SUBL 12(SP), BX + LEAQ 5(AX)(BX*1), BX + CMPQ BX, (SP) + JB repeat_dst_size_check_encodeSnappyBlockAsm + MOVQ $0x00000000, ret+48(FP) + RET + +repeat_dst_size_check_encodeSnappyBlockAsm: MOVL 12(SP), BX CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm @@ -11678,6 +11732,15 @@ repeat_extend_back_loop_encodeSnappyBlockAsm64K: JNZ repeat_extend_back_loop_encodeSnappyBlockAsm64K repeat_extend_back_end_encodeSnappyBlockAsm64K: + MOVL SI, BX + SUBL 12(SP), BX + LEAQ 3(AX)(BX*1), BX + CMPQ BX, (SP) + JB repeat_dst_size_check_encodeSnappyBlockAsm64K + MOVQ $0x00000000, ret+48(FP) + RET + +repeat_dst_size_check_encodeSnappyBlockAsm64K: MOVL 12(SP), BX CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K @@ -12504,6 +12567,15 @@ repeat_extend_back_loop_encodeSnappyBlockAsm12B: JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12B repeat_extend_back_end_encodeSnappyBlockAsm12B: + MOVL SI, BX + SUBL 12(SP), BX + LEAQ 3(AX)(BX*1), BX + CMPQ BX, (SP) + JB repeat_dst_size_check_encodeSnappyBlockAsm12B + MOVQ $0x00000000, ret+48(FP) + RET + +repeat_dst_size_check_encodeSnappyBlockAsm12B: MOVL 12(SP), BX CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B @@ -13330,6 +13402,15 @@ repeat_extend_back_loop_encodeSnappyBlockAsm10B: JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10B repeat_extend_back_end_encodeSnappyBlockAsm10B: + MOVL SI, BX + SUBL 12(SP), BX + LEAQ 3(AX)(BX*1), BX + CMPQ BX, (SP) + JB repeat_dst_size_check_encodeSnappyBlockAsm10B + MOVQ $0x00000000, ret+48(FP) + RET + +repeat_dst_size_check_encodeSnappyBlockAsm10B: MOVL 12(SP), BX CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B @@ -14156,6 +14237,15 @@ repeat_extend_back_loop_encodeSnappyBlockAsm8B: JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8B repeat_extend_back_end_encodeSnappyBlockAsm8B: + MOVL SI, BX + SUBL 12(SP), BX + LEAQ 3(AX)(BX*1), BX + CMPQ BX, (SP) + JB repeat_dst_size_check_encodeSnappyBlockAsm8B + MOVQ $0x00000000, ret+48(FP) + RET + +repeat_dst_size_check_encodeSnappyBlockAsm8B: MOVL 12(SP), BX CMPL BX, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B @@ -17949,6 +18039,15 @@ repeat_extend_back_loop_calcBlockSize: JNZ repeat_extend_back_loop_calcBlockSize repeat_extend_back_end_calcBlockSize: + MOVL SI, BX + SUBL 12(SP), BX + LEAQ 5(AX)(BX*1), BX + CMPQ BX, (SP) + JB repeat_dst_size_check_calcBlockSize + MOVQ $0x00000000, ret+24(FP) + RET + +repeat_dst_size_check_calcBlockSize: MOVL 12(SP), BX CMPL BX, SI JEQ emit_literal_done_repeat_emit_calcBlockSize @@ -18531,6 +18630,15 @@ repeat_extend_back_loop_calcBlockSizeSmall: JNZ repeat_extend_back_loop_calcBlockSizeSmall repeat_extend_back_end_calcBlockSizeSmall: + MOVL SI, BX + SUBL 12(SP), BX + LEAQ 3(AX)(BX*1), BX + CMPQ BX, (SP) + JB repeat_dst_size_check_calcBlockSizeSmall + MOVQ $0x00000000, ret+24(FP) + RET + +repeat_dst_size_check_calcBlockSizeSmall: MOVL 12(SP), BX CMPL BX, SI JEQ emit_literal_done_repeat_emit_calcBlockSizeSmall