-
Notifications
You must be signed in to change notification settings - Fork 1
/
common_macros.h
169 lines (144 loc) · 6.41 KB
/
common_macros.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
/*
VFP math library for the iPhone / iPod touch
Copyright (c) 2007-2008 Wolfgang Engel and Matthias Grundmann
http://code.google.com/p/vfpmathlibrary/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising
from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must
not claim that you wrote the original software. If you use this
software in a product, an acknowledgment in the product documentation
would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must
not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef COMMON_MACROS_H__
#define COMMON_MACROS_H__
// Usage for any VFP routine is:
/*
asm volatile (VFP_SWITCH_TO_ARM
<-- your code goes here
VFP_SWITCH_TO_THUMB
: <list of modified INPUT registers>
: <list of input registers>
: "r0", <additional registers you used>
);
*/
// NOTE: Lots of VFP macros overwrite register r0, therefore you have to make sure
// to include "r0" in the list of registers used, as in above example.
#ifndef NO_THUMB
#warning "Compiling in Thumb Mode. Mode switches activated."
#else
#warning "Compiling in ARM mode. Mode switches deactivated."
#endif
// Switches to from THUMB to ARM mode.
#ifndef NO_THUMB
#define VFP_SWITCH_TO_ARM ".align 4 \n\t" \
"mov r0, pc \n\t" \
"bx r0 \n\t" \
".arm \n\t"
#else
#define VFP_SWITCH_TO_ARM
#endif
// Switches from ARM to THUMB mode.
#ifndef NO_THUMB
#define VFP_SWITCH_TO_THUMB "add r0, pc, #1 \n\t" \
"bx r0 \n\t" \
".thumb \n\t"
#else
#define VFP_SWITCH_TO_THUMB
#endif
// NOTE: Both VFP_VECTOR_LENGTH* macros will stall the FP unit,
// until all currently processed operations have been executed.
// Call wisely.
// FP operations (except load/stores) will interpret a command like
// fadds s8, s16, s24
// AS
// fadds {s8-s11}, {s16-s19}, {s24-s27} in case length is set to zero.
// Sets length and stride to 0.
#define VFP_VECTOR_LENGTH_ZERO "fmrx r0, fpscr \n\t" \
"bic r0, r0, #0x00370000 \n\t" \
"fmxr fpscr, r0 \n\t"
// Set vector length. VEC_LENGTH has to be bitween 0 for length 1 and 3 for length 4.
#define VFP_VECTOR_LENGTH(VEC_LENGTH) "fmrx r0, fpscr \n\t" \
"bic r0, r0, #0x00370000 \n\t" \
"orr r0, r0, #0x000" #VEC_LENGTH "0000 \n\t" \
"fmxr fpscr, r0 \n\t"
// Fixed vector operation for vectors of length 1, i.e. scalars.
// Expects pointers to source and destination data.
// Use VFP_OP_* macros for VFP_OP or any FP assembler opcode that fits.
#define VFP_FIXED_1_VECTOR_OP(VFP_OP, P_SRC_1, P_SRC_2, P_DST) \
asm volatile (VFP_SWITCH_TO_ARM \
"fldmias %1, s8 \n\t" \
"fldmias %2, s16 \n\t" \
VFP_OP " s8, s8, s16 \n\t" \
"fstmias %0, s8 \n\t" \
VFP_SWITCH_TO_THUMB \
: \
: "r" (P_DST), "r" (P_SRC_1), "r" (P_SRC_2) \
: "r0" \
);
// Fixed vector operation for vectors of length 2, i.e. scalars.
// Expects pointers to source and destination data.
// Use VFP_OP_* macros for VFP_OP or any FP assembler opcode that fits.
#define VFP_FIXED_2_VECTOR_OP(VFP_OP, P_SRC_1, P_SRC_2, P_DST) \
asm volatile (VFP_SWITCH_TO_ARM \
"fldmias %1, {s8-s9} \n\t" \
"fldmias %2, {s16-s17} \n\t" \
VFP_OP " s8, s8, s16 \n\t" \
VFP_OP " s9, s9, s17 \n\t" \
"fstmias %0, {s8-s9} \n\t" \
VFP_SWITCH_TO_THUMB \
: \
: "r" (P_DST), "r" (P_SRC_1), "r" (P_SRC_2) \
: "r0" \
);
// Fixed vector operation for vectors of length 3, i.e. scalars.
// Expects pointers to source and destination data.
// Use VFP_OP_* macros for VFP_OP or any FP assembler opcode that fits.
#define VFP_FIXED_3_VECTOR_OP(VFP_OP, P_SRC_1, P_SRC_2, P_DST) \
asm volatile (VFP_SWITCH_TO_ARM \
"fldmias %1, {s8-s10} \n\t" \
"fldmias %2, {s16-s18} \n\t" \
VFP_OP " s8, s8, s16 \n\t" \
VFP_OP " s9, s9, s17 \n\t" \
VFP_OP " s10, s10, s18 \n\t" \
"fstmias %0, {s8-s10} \n\t" \
VFP_SWITCH_TO_THUMB \
: \
: "r" (P_DST), "r" (P_SRC_1), "r" (P_SRC_2) \
: "r0" \
);
// Fixed vector operation for vectors of length 4, i.e. scalars.
// Expects pointers to source and destination data.
// Use VFP_OP_* macros for VFP_OP or any FP assembler opcode that fits.
#define VFP_FIXED_4_VECTOR_OP(VFP_OP, P_SRC_1, P_SRC_2, P_DST) \
asm volatile (VFP_SWITCH_TO_ARM \
"fldmias %1, {s8-s11} \n\t" \
"fldmias %2, {s16-s19} \n\t" \
VFP_OP " s8, s8, s16 \n\t" \
VFP_OP " s9, s9, s17 \n\t" \
VFP_OP " s10, s10, s18 \n\t" \
VFP_OP " s11, s11, s19 \n\t" \
"fstmias %0, {s8-s11} \n\t" \
VFP_SWITCH_TO_THUMB \
: \
: "r" (P_DST), "r" (P_SRC_1), "r" (P_SRC_2) \
: "r0" \
);
// NOTE: Usage example for VFP_FIXED_?_VECTOR_OP
// float* src_ptr_1;
// float* src_ptr_2;
// float* dst_ptr;
// VFP_FIXED_4_VECTOR_OP(VFP_OP_ADD, src_ptr_1, src_ptr_2, dst_ptr)
#define VFP_OP_ADD "fadds"
#define VFP_OP_SUB "fsubs"
#define VFP_OP_MUL "fmuls"
#define VFP_OP_DIV "fdivs"
#define VFP_OP_ABS "fabss"
#define VFP_OP_SQRT "fsqrts"
#endif // COMMON_MACROS_H__