This repository has been archived by the owner on Aug 2, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 12
/
remainder_piby2f_forAsm.asm
180 lines (158 loc) · 5.73 KB
/
remainder_piby2f_forAsm.asm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
;
; MIT License
; -----------
;
; Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
;
; Permission is hereby granted, free of charge, to any person obtaining a copy
; of this Software and associated documentaon files (the "Software"), to deal
; in the Software without restriction, including without limitation the rights
; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
; copies of the Software, and to permit persons to whom the Software is
; furnished to do so, subject to the following conditions:
;
; The above copyright notice and this permission notice shall be included in
; all copies or substantial portions of the Software.
;
; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
; THE SOFTWARE.
;
; An implementation of the remainder by pi/2 function
; This is a service routine for use by trig functions coded in asm
;
; On input,
; xmm0 = x; Note that we assume x >= pi/4
; On ouput
; xmm0 = r
; eax = region
.const
ALIGN 16
L__piby2 DQ 03ff921fb54442d18h
EXTRN __L_2_by_pi_bits:BYTE
fname TEXTEQU <__remainder_piby2d2f_forAsm>
stack_size EQU 000h
include fm.inc
.code
PUBLIC fname
fname PROC FRAME
StackAllocate stack_size
.ENDPROLOG
lea r9,__L_2_by_pi_bits
;get the unbiased exponent and the mantissa part of x
;Since x >= pi/4, xexp = (x >> 52) - 1023
movd r11,xmm0
mov rcx,r11
shr r11,52
sub r11,1023 ; r11 <-- xexp = exponent of input x
;calculate the last byte from which to start multiplication
;last = 134 - (xexp >> 3)
mov r10,r11
shr r10,3
sub r10,134 ;r10 = -last
neg r10 ;r10 = last
;load 64 bits of 2_by_pi
mov rax,[r9 + r10]
;mantissa of x = ((x << 12) >> 12) | implied bit
shl rcx,12
shr rcx,12 ;rcx = mantissa part of input x
bts rcx,52 ;add the implied bit as well
;load next 128 bits of 2_by_pi
add r10,8 ;increment to next 8 bytes of 2_by_pi
movdqu xmm0,[r9 + r10]
;do three 64-bit multiplications with mant of x
mul rcx
mov r8,rax ;r8 = last 64 bits of mul = res1[2]
mov r10,rdx ;r10 <-- carry
movd rax,xmm0
mul rcx
;resexp = xexp & 7
and r11,7 ;r11 = resexp = xexp & 7 = last 3 bits
psrldq xmm0,8
add rax,r10 ; add the previous carry
adc rdx,0
mov r9,rax ;r9 = next 64 bits of mul = res1[1]
mov r10,rdx ;r10 <-- carry
movd rax,xmm0
mul rcx
add r10,rax ;r10 = most sig 64 bits = res1[0]
;find the region
;last three bits ltb = most sig bits >> (54 - resexp))
; decimal point in last 18 bits ==> 8 lsb's in first 64 bits and
; 8 msb's in next 64 bits
;point_five = ltb & 01h;
;region = ((ltb >> 1) + point_five) & 3;
mov rcx,54
mov rax,r10
sub rcx,r11
xor rdx,rdx ;rdx = sign of x(i.e first part of x * 2bypi)
shr rax,cl
jnc L__no_point_five
;;if there is carry.. then negate the result of multiplication
not r10
not r9
not r8
mov rdx,08000000000000000h
ALIGN 16
L__no_point_five:
adc rax,0
and rax,3
; Until / unless we find a better place to save it, we're putting
; the region in xmm1.
movd xmm1, rax
;calculate the number of integer bits and zero them out
mov rcx,r11
add rcx,10 ;rcx = no. of integer bits
shl r10,cl
shr r10,cl ;r10 contains only mant bits
sub rcx,64 ;form the exponent
mov r11,rcx
;find the highest set bit
bsr rcx,r10
jnz L__form_mantissa
mov r10,r9
mov r9,r8
bsr rcx,r10 ;rcx = hsb
sub r11,64
ALIGN 16
L__form_mantissa:
add r11,rcx ;for exp of x
sub rcx,52 ;rcx = no. of bits to shift in r10
cmp rcx,0
jl L__hsb_below_52
je L__form_numbers
;hsb above 52
mov r8,r10 ;previous contents of r8 not required
shr r10,cl ;r10 = mantissa of x with hsb at 52
jmp L__form_numbers
ALIGN 16
L__hsb_below_52:
neg rcx
mov rax,r9
shl r10,cl
shl r9,cl
sub rcx,64
neg rcx
shr rax,cl
or r10,rax
ALIGN 16
L__form_numbers:
add r11,1023
btr r10,52 ;remove the implied bit
mov rcx,r11
or r10,rdx ;put the sign
shl rcx,52
or r10,rcx ;x is in r10
movd xmm0,r10 ; xmm0 = x
movd rax, xmm1 ; rax <-- region
; At this point xmm0 has a double precision version of the fractional part
; of x * 2/pi. To get the reduced argument r, we multiply that by pi/2.
mulsd xmm0,L__piby2
StackDeallocate stack_size
ret
fname endp
END