Skip to content

Commit

Permalink
[skip ci] Review suggestion: copy in 32-byte chunks
Browse files Browse the repository at this point in the history
  • Loading branch information
WojciechMula committed Mar 18, 2022
1 parent cb83f0a commit 2cb26c5
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 9 deletions.
17 changes: 11 additions & 6 deletions zstd/_generate/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ type executeSimple struct{}
//
// See copyMemory()
func (e executeSimple) copySize() int {
return 16
return 32
}

func (e executeSimple) generateProcedure(name string) {
Expand Down Expand Up @@ -716,14 +716,19 @@ func (e executeSimple) generateProcedure(name string) {
func (e executeSimple) copyMemory(suffix string, src, dst, length reg.GPVirtual) {
label := "copy_" + suffix
ofs := GP64()
s := Mem{Base: src, Index: ofs, Scale: 1}
d := Mem{Base: dst, Index: ofs, Scale: 1}
s0 := Mem{Base: src, Index: ofs, Scale: 1}
s1 := Mem{Base: src, Index: ofs, Scale: 1, Disp: 16}
d0 := Mem{Base: dst, Index: ofs, Scale: 1}
d1 := Mem{Base: dst, Index: ofs, Scale: 1, Disp: 16}

XORQ(ofs, ofs)
Label(label)
t := XMM()
MOVUPS(s, t)
MOVUPS(t, d)
t0 := XMM()
t1 := XMM()
MOVUPS(s0, t0)
MOVUPS(s1, t1)
MOVUPS(t0, d0)
MOVUPS(t1, d1)
ADDQ(U8(e.copySize()), ofs)
CMPQ(ofs, length)
JB(LabelRef(label))
Expand Down
10 changes: 7 additions & 3 deletions zstd/seqdec_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,7 @@ main_loop:

// Check if we won't overflow ctx.out while fast copying
LEAQ (R10)(R11*1), R12
LEAQ 16(R8)(R12*1), R13
LEAQ 32(R8)(R12*1), R13
CMPQ R13, SI
JA slow_path

Expand All @@ -634,8 +634,10 @@ main_loop:

copy_1:
MOVUPS (DI)(R12*1), X0
MOVUPS 16(DI)(R12*1), X1
MOVUPS X0, (BX)(R12*1)
ADDQ $0x10, R12
MOVUPS X1, 16(BX)(R12*1)
ADDQ $0x20, R12
CMPQ R12, R11
JB copy_1
ADDQ R11, DI
Expand All @@ -658,8 +660,10 @@ copy_match:

copy_2:
MOVUPS (R12)(R11*1), X0
MOVUPS 16(R12)(R11*1), X1
MOVUPS X0, (BX)(R11*1)
ADDQ $0x10, R11
MOVUPS X1, 16(BX)(R11*1)
ADDQ $0x20, R11
CMPQ R11, R10
JB copy_2
ADDQ R10, BX
Expand Down

0 comments on commit 2cb26c5

Please sign in to comment.