-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcore_matrix.f
295 lines (277 loc) · 7.95 KB
/
core_matrix.f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
\ Add a constant value to all elements of a matrix.
: matrix_s16>n ( u a-addr -- )
swap
dup *
0 do \ &A
dup @ s16>n
over !
cell+
loop
drop ;
\ Initialize the memory block for matrix benchmarking: blksize &memblk seed
\ Parameters:
\ blksize - Size of memory to be initialized.
\ memblk - Pointer to memory block.
\ seed - Actual values chosen depend on the seed parameter.
\ p - pointers to <mat_params> containing initialized matrixes.
\ Returns:
\ Matrix dimensions.
\ ee_u32 core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p)
: core_init_matrix ( u1 a-addr u2 -- )
dup 0= if drop 1 then
>r tuck ( &memblk blksize &memblk R: seed )
2dup ! \ | N | ...
cell+ dup 3 cells + swap ( N &A a-addr )
2dup ! \ | N | &A | ...
cell+ >r
over dup * cells ( N &A size )
2dup + ( N &A size &B )
swap over ( N &A &B size &B )
dup r@ ! \ | N | &A | &B | ...
+ r> cell+ ! \ | N | &A | &B | &C | A11 ...
rot dup * 1+ r> swap
1 do ( &A &B seed )
i * $FFFF and
dup i + $FFFF and
rot 2dup ! cell+ ( &A seed val &B )
i swap >r + $00FF and
rot tuck ! cell+
swap r> swap
loop
2drop drop ( &memblk )
dup @ swap cell+ ( N a-addr )
2dup cell+ @
matrix_s16>n
@ matrix_s16>n ;
\ Print s16 matrix of size u located at a-addr
: s16.matrix ( u a-addr -- )
over
0 do ( u a-addr )
over
0 do ( a-addr )
dup @
s16.
cell+
loop
cr
loop
2drop ;
\ Print s32 matrix of size u located at a-addr
: s32.matrix ( u a-addr -- )
over
0 do ( u a-addr )
over
0 do ( a-addr )
dup 2@
s32.
cell+ cell+
loop
cr
loop
2drop ;
\ Calculate a function that depends on the values of elements in the matrix: &C clipval N --
\ For each element, accumulate into a temporary variable.
\ As long as this value is under the parameter clipval,
\ add 1 to the result if the element is bigger then the previous.
\ Otherwise, reset the accumulator and add 10 to the result.
\ ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval)
: matrix_sum ( a-addr n1 u -- n2 )
>r >r >r
0 0 0 0 r> r> 0 r>
dup *
0 do \ dprev dtmp &C clipval ret
>r >r dup >r
2@ d+ 2dup \ dprev dtmp dtmp R: ret clipval &C
r> r@ swap >r #-1 \ clipval is always negative
d> if \ dprev dtmp
2drop 2drop
r@ 2@ 0 0
#10
else
2swap r@ 2@ 2swap
d> if
r@ 2@ 2swap #1
else
r@ 2@ 2swap 0
then
then
r> cell+ cell+
r> rot r> +
loop
>r 2drop 2drop 2drop r> ;
\ Multiply a matrix by a constant: N &C &A val
\ This could be used as a scaler for instance.
\ void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val)
: matrix_mul_const ( u a-addr1 a-addr2 n -- )
swap rot \ N val &A &C
>r rot r> swap \ val &A &C N
dup *
0 do \ val &A &C
>r
2dup @ m*
r@ 2!
cell+
r> cell+ cell+
loop
2drop drop ;
\ Add a constant value to all elements of a matrix: N &A val
\ void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val)
: matrix_add_const ( u a-addr n -- )
swap
rot dup *
0 do \ val &A
2dup @ +
over !
cell+
loop
2drop ;
\ Multiply a matrix by a vector: N &C &A &B
\ This is common in many simple filters (e.g. fir where a vector of coefficients is applied to the matrix.)
\ void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
: matrix_mul_vect ( u a-addr1 a-addr2 a-addr3 -- )
rot >r rot >r \ R: &C N
tuck \ &B0 &A &B
r> r> swap \ &B0 &A &B &C N
dup
0 do \ &B0 &A &B &C N
dup >r swap >r
0 0 rot
0 do \ &A &B c R: N &C
>r >r
over cell+ over cell+
2swap @ swap @ m*
r> r>
d+
loop
r@ 2!
drop over \ reset &B
r> cell+ cell+ r>
loop
2drop 2drop drop ;
\ Scalar product of A row by B column: &A &B N -- res
: scalar_product ( a-addr1 a-addr2 u -- d )
>r >r >r
0 0 r> r> r> dup
0 do \ d &A &B N
>r 2dup >r >r
@ swap @ m* d+
r> cell+
r> r@ cells +
r>
loop
2drop drop ;
\ Row by matrix multiplication: &A &B &C N --
: row_mul_matrix ( a-addr1 a-addr2 a-addr3 u -- )
dup
0 do \ &A &B &C N
dup >r swap >r \ R: N &C
>r 2dup r> scalar_product
r@ 2!
cell+ r> cell+ cell+ r>
loop
2drop 2drop ;
\ Multiply a matrix by a matrix: &A &B &C N --
\ Basic code is used in many algorithms, mostly with minor changes such as scaling.
\ void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
: matrix_mul_matrix ( a-addr1 a-addr2 a-addr3 u -- )
dup
0 do \ &A &B &C N
dup
0 do \ &A &B &C N
dup >r swap >r \ R: N &C
>r 2dup r> scalar_product
r@ 2!
cell+ r> cell+ cell+ r>
loop
tuck >r >r
cells tuck -
>r + r> r> r>
loop
2drop 2drop ;
: bit_extract ( x1 u1 u2 -- x2 )
>r rshift #-1 r> lshift invert and ;
\ Bitextract scalar product of A row by B column: &A &B N -- res
: scalar_product_bitextract ( a-addr1 a-addr2 u -- d )
>r >r >r
0 0 r> r> r> dup
0 do \ d &A &B N
>r 2dup >r >r
@ swap @ m*
drop dup #2 #4 bit_extract
swap #5 #7 bit_extract
m* d+
r> cell+
r> r@ cells +
r>
loop
2drop drop ;
\ Multiply a matrix by a matrix, and extract some bits from the result: &A &B &C N --
\ Basic code is used in many algorithms, mostly with minor changes such as scaling.
\ void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
: matrix_mul_matrix_bitextract ( a-addr1 a-addr2 a-addr3 u -- )
dup
0 do \ &A &B &C N
dup
0 do \ &A &B &C N
dup >r swap >r \ R: N &C
>r 2dup r> scalar_product_bitextract
r@ 2!
cell+ r> cell+ cell+ r>
loop
tuck >r >r
cells tuck -
>r + r> r> r>
loop
2drop 2drop ;
: matrix_big ( n1 -- n2 )
$-1000 or ;
: p->n ( a-addr -- u )
@ ;
: p->a ( a-addr1 -- a-addr2 )
cell+ @ ;
: p->b ( a-addr1 -- a-addr2 )
cell+ cell+ @ ;
: p->c ( a-addr1 -- a-addr2 )
cell+ cell+ cell+ @ ;
\ Perform matrix manipulation: &p val -- crc
\ Parameters:
\ N - Dimensions of the matrix.
\ C - memory for result matrix.
\ A - input matrix
\ B - operator matrix (not changed during operations)
\ Returns:
\ A CRC value that captures all results calculated in the function.
\ In particular, crc of the value calculated on the result matrix
\ after each step by <matrix_sum>.
\ Operation:
\ 1 - Add a constant value to all elements of a matrix.
\ 2 - Multiply a matrix by a constant.
\ 3 - Multiply a matrix by a vector.
\ 4 - Multiply a matrix by a matrix.
\ 5 - Add a constant value to all elements of a matrix.
\ After the last step, matrix A is back to original contents.
\ ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val)
: matrix_test ( a-addr n -- u )
s16>n tuck \ val &p val
2dup >r dup p->n swap p->a r> matrix_add_const
2dup >r dup p->n swap dup p->c swap p->a r> matrix_mul_const
matrix_big \ val &p clipval
2dup over p->c swap rot p->n matrix_sum
0 crc16 >r \ R: crc
over dup p->n swap dup p->c swap dup p->a swap p->b matrix_mul_vect
2dup over p->c swap rot p->n matrix_sum
r> crc16 >r \ R: crc
over dup p->a swap dup p->b swap dup p->c swap p->n matrix_mul_matrix
2dup over p->c swap rot p->n matrix_sum
r> crc16 >r \ R: crc
over dup p->a swap dup p->b swap dup p->c swap p->n matrix_mul_matrix_bitextract
2dup over p->c swap rot p->n matrix_sum
r> crc16 >r \ R: crc
drop dup p->n swap p->a rot negate matrix_add_const
r> ;
\ Benchmark function: &p seed crc
\ Iterate <matrix_test> N times,
\ changing the matrix values slightly by a constant amount each time.
\ ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc)
: core_bench_matrix ( a-addr n u1 -- u2 )
>r matrix_test r> crc16 ;