-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlookup_subrange_amd64.s
232 lines (206 loc) · 8.58 KB
/
lookup_subrange_amd64.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
//+build !noasm !appengine
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
DATA LCDATA1<>+0x000(SB)/8, $0x8080808080808080
DATA LCDATA1<>+0x008(SB)/8, $0x8080808080808080
DATA LCDATA1<>+0x010(SB)/8, $0x0000000000000080
GLOBL LCDATA1<>(SB), 8, $24
TEXT ·_lookup_subrange_avx(SB), $0-48
MOVQ input+0(FP), DI
MOVQ output+8(FP), SI
MOVQ range_upper+16(FP), DX
MOVQ table+24(FP), CX
MOVQ subranges+32(FP), R8
MOVQ info+40(FP), R9
LEAQ LCDATA1<>(SB), BP
WORD $0x894c; BYTE $0xcb // mov rbx, r9
LONG $0x20ebc148 // shr rbx, 32
WORD $0xc031 // xor eax, eax
WORD $0xfb83; BYTE $0x01 // cmp ebx, 1
JNE LBB0_15
LONG $0x10f98341 // cmp r9d, 16
JAE LBB0_2
LBB0_14:
WORD $0x8944; BYTE $0xc8 // mov eax, r9d
WORD $0xe083; BYTE $0xf0 // and eax, -16
LBB0_15:
WORD $0x3944; BYTE $0xc8 // cmp eax, r9d
JAE LBB0_30
WORD $0x634c; BYTE $0xd0 // movsxd r10, eax
WORD $0x8545; BYTE $0xc0 // test r8d, r8d
JLE LBB0_17
WORD $0x8945; BYTE $0xc3 // mov r11d, r8d
LONG $0xff438d4d // lea r8, [r11 - 1]
WORD $0x8945; BYTE $0xde // mov r14d, r11d
LONG $0x03e68341 // and r14d, 3
LONG $0xfce38341 // and r11d, -4
JMP LBB0_24
LBB0_29:
LONG $0xc5b60f41 // movzx eax, r13b
WORD $0x048a; BYTE $0x01 // mov al, byte [rcx + rax]
LONG $0x16048842 // mov byte [rsi + r10], al
LONG $0x01c28349 // add r10, 1
WORD $0x3945; BYTE $0xca // cmp r10d, r9d
JAE LBB0_30
LBB0_24:
LONG $0x3cb60f46; BYTE $0x17 // movzx r15d, byte [rdi + r10]
WORD $0x3145; BYTE $0xe4 // xor r12d, r12d
WORD $0x3145; BYTE $0xed // xor r13d, r13d
LONG $0x03f88349 // cmp r8, 3
JB LBB0_26
LBB0_25:
LONG $0x04be0f42; BYTE $0x22 // movsx eax, byte [rdx + r12]
WORD $0x3941; BYTE $0xc7 // cmp r15d, eax
WORD $0x9f0f; BYTE $0xc0 // setg al
WORD $0x0044; BYTE $0xe8 // add al, r13b
LONG $0x5cbe0f42; WORD $0x0122 // movsx ebx, byte [rdx + r12 + 1]
WORD $0x3941; BYTE $0xdf // cmp r15d, ebx
WORD $0x9f0f; BYTE $0xc3 // setg bl
WORD $0xc300 // add bl, al
LONG $0x44be0f42; WORD $0x0222 // movsx eax, byte [rdx + r12 + 2]
WORD $0x3941; BYTE $0xc7 // cmp r15d, eax
WORD $0x9f0f; BYTE $0xc0 // setg al
WORD $0xd800 // add al, bl
LONG $0x5cbe0f42; WORD $0x0322 // movsx ebx, byte [rdx + r12 + 3]
WORD $0x3941; BYTE $0xdf // cmp r15d, ebx
LONG $0xc59f0f41 // setg r13b
WORD $0x0041; BYTE $0xc5 // add r13b, al
LONG $0x04c48349 // add r12, 4
WORD $0x394d; BYTE $0xe3 // cmp r11, r12
JNE LBB0_25
LBB0_26:
WORD $0x854d; BYTE $0xf6 // test r14, r14
JE LBB0_29
WORD $0x0149; BYTE $0xd4 // add r12, rdx
WORD $0xc031 // xor eax, eax
LBB0_28:
LONG $0x1cbe0f41; BYTE $0x04 // movsx ebx, byte [r12 + rax]
WORD $0x3941; BYTE $0xdf // cmp r15d, ebx
WORD $0x9f0f; BYTE $0xc3 // setg bl
WORD $0x0041; BYTE $0xdd // add r13b, bl
LONG $0x01c08348 // add rax, 1
WORD $0x3949; BYTE $0xc6 // cmp r14, rax
JNE LBB0_28
JMP LBB0_29
LBB0_2:
WORD $0x8944; BYTE $0xc8 // mov eax, r9d
WORD $0xe8c1; BYTE $0x04 // shr eax, 4
WORD $0xf883; BYTE $0x02 // cmp eax, 2
LONG $0x0001ba41; WORD $0x0000 // mov r10d, 1
LONG $0xd0430f44 // cmovae r10d, eax
WORD $0x8545; BYTE $0xc0 // test r8d, r8d
JLE LBB0_3
WORD $0x8945; BYTE $0xc3 // mov r11d, r8d
WORD $0x8944; BYTE $0xd8 // mov eax, r11d
WORD $0xe083; BYTE $0xfe // and eax, -2
WORD $0x3145; BYTE $0xff // xor r15d, r15d
LONG $0x456ff9c5; BYTE $0x00 // vmovdqa xmm0, oword 0[rbp] /* [rip + LCPI0_0] */
LONG $0x5879e2c4; WORD $0x104d // vpbroadcastd xmm1, dword 16[rbp] /* [rip + LCPI0_1] */
LONG $0x000080bb; BYTE $0x00 // mov ebx, 128
LONG $0xd36ef9c5 // vmovd xmm2, ebx
JMP LBB0_9
LBB0_13:
LONG $0x196ffac5 // vmovdqu xmm3, oword [rcx]
LONG $0x0061e2c4; BYTE $0xdc // vpshufb xmm3, xmm3, xmm4
LONG $0x7f7aa1c4; WORD $0x361c // vmovdqu oword [rsi + r14], xmm3
LONG $0x01c78349 // add r15, 1
WORD $0x394d; BYTE $0xd7 // cmp r15, r10
JE LBB0_14
LBB0_9:
WORD $0x894d; BYTE $0xfe // mov r14, r15
LONG $0x04e6c149 // shl r14, 4
LONG $0xef79a1c4; WORD $0x371c // vpxor xmm3, xmm0, oword [rdi + r14]
LONG $0xe4efd9c5 // vpxor xmm4, xmm4, xmm4
WORD $0xdb31 // xor ebx, ebx
LONG $0x01f88341 // cmp r8d, 1
JE LBB0_11
LBB0_10:
LONG $0x7879e2c4; WORD $0x1a2c // vpbroadcastb xmm5, byte [rdx + rbx]
LONG $0xeaefd1c5 // vpxor xmm5, xmm5, xmm2
LONG $0x7879e2c4; BYTE $0xed // vpbroadcastb xmm5, xmm5
LONG $0xed64e1c5 // vpcmpgtb xmm5, xmm3, xmm5
LONG $0xe5f8d9c5 // vpsubb xmm4, xmm4, xmm5
LONG $0x7879e2c4; WORD $0x1a6c; BYTE $0x01 // vpbroadcastb xmm5, byte [rdx + rbx + 1]
LONG $0xeaefd1c5 // vpxor xmm5, xmm5, xmm2
LONG $0x7879e2c4; BYTE $0xed // vpbroadcastb xmm5, xmm5
LONG $0xed64e1c5 // vpcmpgtb xmm5, xmm3, xmm5
LONG $0xe5f8d9c5 // vpsubb xmm4, xmm4, xmm5
LONG $0x02c38348 // add rbx, 2
WORD $0x3948; BYTE $0xd8 // cmp rax, rbx
JNE LBB0_10
LBB0_11:
LONG $0x01c3f641 // test r11b, 1
JE LBB0_13
LONG $0x7879e2c4; WORD $0x1a2c // vpbroadcastb xmm5, byte [rdx + rbx]
LONG $0xe9efd1c5 // vpxor xmm5, xmm5, xmm1
LONG $0x7879e2c4; BYTE $0xed // vpbroadcastb xmm5, xmm5
LONG $0xdd64e1c5 // vpcmpgtb xmm3, xmm3, xmm5
LONG $0xe3f8d9c5 // vpsubb xmm4, xmm4, xmm3
JMP LBB0_13
LBB0_17:
WORD $0xd0f7 // not eax
WORD $0x0144; BYTE $0xc8 // add eax, r9d
WORD $0x8944; BYTE $0xca // mov edx, r9d
WORD $0xe283; BYTE $0x03 // and edx, 3
JE LBB0_19
LBB0_18:
WORD $0xb60f; BYTE $0x19 // movzx ebx, byte [rcx]
LONG $0x161c8842 // mov byte [rsi + r10], bl
LONG $0x01c28349 // add r10, 1
WORD $0xc283; BYTE $0xff // add edx, -1
JNE LBB0_18
LBB0_19:
WORD $0xf883; BYTE $0x03 // cmp eax, 3
JB LBB0_30
LBB0_20:
WORD $0xb60f; BYTE $0x01 // movzx eax, byte [rcx]
LONG $0x16048842 // mov byte [rsi + r10], al
WORD $0xb60f; BYTE $0x01 // movzx eax, byte [rcx]
LONG $0x16448842; BYTE $0x01 // mov byte [rsi + r10 + 1], al
WORD $0xb60f; BYTE $0x01 // movzx eax, byte [rcx]
LONG $0x16448842; BYTE $0x02 // mov byte [rsi + r10 + 2], al
WORD $0xb60f; BYTE $0x01 // movzx eax, byte [rcx]
LONG $0x16448842; BYTE $0x03 // mov byte [rsi + r10 + 3], al
LONG $0x04c28349 // add r10, 4
WORD $0x3945; BYTE $0xca // cmp r10d, r9d
JB LBB0_20
LBB0_30:
RET
LBB0_3:
LONG $0xff428d49 // lea rax, [r10 - 1]
WORD $0x8945; BYTE $0xd3 // mov r11d, r10d
LONG $0x03e38341 // and r11d, 3
LONG $0x03f88348 // cmp rax, 3
JAE LBB0_21
WORD $0xdb31 // xor ebx, ebx
JMP LBB0_5
LBB0_21:
LONG $0xfce28341 // and r10d, -4
LONG $0x30468d48 // lea rax, [rsi + 48]
WORD $0xdb31 // xor ebx, ebx
LBB0_22:
LONG $0x7879e2c4; BYTE $0x01 // vpbroadcastb xmm0, byte [rcx]
LONG $0x407ffac5; BYTE $0xd0 // vmovdqu oword [rax - 48], xmm0
LONG $0x7879e2c4; BYTE $0x01 // vpbroadcastb xmm0, byte [rcx]
LONG $0x407ffac5; BYTE $0xe0 // vmovdqu oword [rax - 32], xmm0
LONG $0x7879e2c4; BYTE $0x01 // vpbroadcastb xmm0, byte [rcx]
LONG $0x407ffac5; BYTE $0xf0 // vmovdqu oword [rax - 16], xmm0
LONG $0x7879e2c4; BYTE $0x01 // vpbroadcastb xmm0, byte [rcx]
LONG $0x007ffac5 // vmovdqu oword [rax], xmm0
LONG $0x04c38348 // add rbx, 4
LONG $0x40c08348 // add rax, 64
WORD $0x3949; BYTE $0xda // cmp r10, rbx
JNE LBB0_22
LBB0_5:
WORD $0x854d; BYTE $0xdb // test r11, r11
JE LBB0_14
LONG $0x04e3c148 // shl rbx, 4
WORD $0x0148; BYTE $0xf3 // add rbx, rsi
LONG $0x04e3c149 // shl r11, 4
WORD $0xc031 // xor eax, eax
LBB0_7:
LONG $0x7879e2c4; BYTE $0x01 // vpbroadcastb xmm0, byte [rcx]
LONG $0x047ffac5; BYTE $0x03 // vmovdqu oword [rbx + rax], xmm0
LONG $0x10c08348 // add rax, 16
WORD $0x3949; BYTE $0xc3 // cmp r11, rax
JNE LBB0_7
JMP LBB0_14