-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathlzsa.s
343 lines (304 loc) · 10.2 KB
/
lzsa.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
; Sample reference depack code for lzsa: https://github.com/emmanuel-marty/lzsa
; Currently only supports the standard stream data, using blocks and forwards-decompression.
; Emphasis is on correctness rather than speed/size.
;------------------------------------------------------------------------------
; Depack an lzsa stream containing 1 or more lzsa-1 blocks.
; No bounds checking is performed.
; input a0 - start of compressed frame
; input a2 - start of output buffer
; return d0 - 0 for successful output, -1 for bad format.
lzsa_depack_stream:
cmp.b #$7b,(a0)+
bne.s .bad_format
cmp.b #$9e,(a0)+
bne.s .bad_format
move.l a7,a5
subq.l #4,a7
;read encoding type
;* V: 3 bit code that indicates which block data encoding is used. 0 is LZSA1 and 2 is LZSA2.
lea _decode_block_lzsa1(pc),a1 ; a1 = block depacker v1
move.b (a0)+,d0 ; get encoding type ($40 == lzsa2)
cmp.b #%00100000,d0
bne.s .block_loop
lea _decode_block_lzsa2(pc),a1 ; a1 = block depacker v2
.block_loop:
;# Frame format
;Each frame contains a 3-bytes length followed by block data that expands to up to 64 Kb of decompressed data.
;The block data is encoded either as LZSA1 or LZSA2 depending on the V bits of the traits byte in the header.
; 0 1 2
; DSZ0 DSZ1 U|DSZ2
move.b (a0)+,-(a5)
move.b (a0)+,-(a5)
move.b (a0)+,-(a5) ;could test here for the copy block flag
clr.b -(a5)
move.l (a5)+,d0
beq.s .all_done ;zero size == end of data
; Check compression flag. If set, copy raw data.
btst #23,d0
bne.s .uncompressed_block
; Assume the other bits are unset, so no need to mask now.
lea (a0,d0.w),a4 ; a4 = end of block
jsr (a1) ; run block depacker
bra.s .block_loop
.all_done:
addq.l #4,a7
moveq #0,d0 ; return success
rts
.uncompressed_block:
and.l #$1ffff,d0 ; mask size
.copy_block_loop:
move.b (a0)+,(a2)+
subq.l #1,d0
bne.s .copy_block_loop
bra.s .block_loop
.bad_format:
moveq #-1,d0 ; return failure
rts
;------------------------------------------------------------------------------
; Depack a single forward-compressed LZSA v1 block.
; a0 = block data
; a2 = output
; a4 = block end
_decode_block_lzsa1:
moveq #0,d0 ; ensure top bits are clear
; ============ TOKEN ==============
.loop:
; order of data:
; * token: <O|LLL|MMMM>
; * optional extra literal length
; * literal values
; * match offset low
; * optional match offset high
; * optional extra encoded match length
move.b (a0)+,d0 ; d0 = token byte
move.w d0,d1
and.w #%01110000,d1 ; d1 = literal length * 16, 0x0-0x70
beq.s .no_literals
lsr.w #4,d1 ; d1 = literal length, 0x0-0x7
cmp.b #7,d1
bne.s .copy_literals
; ============ EXTRA LITERAL LENGTH ==============
add.b (a0)+,d1 ; (we know the original is 7)
bcc.s .copy_literals ; 0-248, no carry is set, result 0-255
beq.s .copy249
; carry and not-equal means > 250
; 250: a second byte follows. The final literals value is 256 + the second byte.
move.b (a0)+,d1 ; higher byte is 0 from d0
add.w #256,d1
bra.s .copy_literals
.copy249:
; 249: a second and third byte follow, forming a little-endian 16-bit value.
; (note: value is unsigned!)
; Use 2 bytes as the offset, low-byte first
move.b (a0)+,-(a5)
move.b (a0)+,-(a5)
move.w (a5)+,d1
; ============ LITERAL VALUES ==============
.copy_literals:
subq.w #1,d1
.copy_loop:
move.b (a0)+,(a2)+
dbf.s d1,.copy_loop
.no_literals:
cmp.l a0,a4 ; end of block?
bne.s .get_match_offset
rts
; ============ MATCH OFFSET LOW ==============
.get_match_offset:
moveq #-1,d2 ; make it work for offsets bigger than 32K
btst #7,d0 ; two-bytes match offset?
beq.s .small_offset
; Use 2 bytes as the offset, low-byte first
move.b (a0)+,-(a5)
move.b (a0)+,-(a5)
move.w (a5)+,d2
bra.s .match_offset_done
.small_offset:
move.b (a0)+,d2 ; d2 = match offset pt 1
.match_offset_done:
; ============ MATCH LENGTH EXTRA ==============
; Match Length
move.w d0,d1
and.w #%00001111,d1 ; d1 = match length
addq.w #3,d1 ; d1 = match length +3 (3..18)
cmp.w #18,d1
bne.s .match_length_done
; d1.w = 15 here
add.b (a0)+,d1 ; get next size marker
bcc.s .match_length_done ; * 0-237: the value is added to the 15 stored in the token.
beq.s .match_length_238 ; * 238: a second and third byte follow
; 239: a second byte follows. The final match length is 256 + the second byte.
move.b (a0)+,d1
add.w #256,d1
bra.s .match_length_done
.match_length_238:
; 238 a second and third byte follow, forming a little-endian 16-bit value. The final encoded match length is that 16-bit value.
move.b (a0)+,-(a5)
move.b (a0)+,-(a5)
move.w (a5)+,d1
.match_length_done:
.copy_match:
subq.w #1,d1 ; -1 for dbf
lea (a2,d2.l),a3 ; a3 = match source (d2.w already negative)
.copy_match_loop:
move.b (a3)+,(a2)+
dbf d1,.copy_match_loop
bra.s .loop
.all_done:
rts
;------------------------------------------------------------------------------
; Depack a single forward-compressed LZSA v2 block.
; a0 = block data
; a2 = output
; a4 = block end
_decode_block_lzsa2:
moveq #-1,d4 ; d4 = last match offset
moveq #-1,d3 ; d3 = nybble flag (-1 == read again)
moveq #0,d0 ; ensure top bits are clear
; ============ TOKEN ==============
.loop:
; order of data:
;* token: <XYZ|LL|MMM>
;* optional extra literal length
;* literal values
;* match offset
;* optional extra encoded match length
;7 6 5 4 3 2 1 0
;X Y Z L L M M M
move.b (a0)+,d0 ; d0 = token byte
move.w d0,d1
; Length is built in d1
and.w #%011000,d1 ; d1 = literal length * 8
beq.s .no_literals
lsr.w #3,d1 ; d1 = literal length, 0x0-0x3
cmp.b #3,d1 ; special literal length?
bne.s .copy_literals
; ============ EXTRA LITERAL LENGTH ==============
bsr .read_nybble
add.b d2,d1 ; generate literal length
cmp.b #15+3,d1
;0-14: the value is added to the 3 stored in the token, to compose the final literals length.
bne.s .copy_literals
; Ignore d2. Extra byte follows.
move.b (a0)+,d1 ; read new length
; assume: * 0-237: 18 is added to the value (3 from the token + 15 from the nibble), to compose the final literals length.
add.b #18,d1 ; assume 0-237
bcc.s .copy_literals
;* 239: a second and third byte follow, forming a little-endian 16-bit value.
move.b (a0)+,-(a5)
move.b (a0)+,-(a5)
move.w (a5)+,d1
; ============ LITERAL VALUES ==============
.copy_literals:
subq.w #1,d1
.copy_loop:
move.b (a0)+,(a2)+
dbf.s d1,.copy_loop
.no_literals:
cmp.l a0,a4 ; end of block?
bne.s .get_match_offset
rts
; ============ MATCH OFFSET ==============
.get_match_offset:
;The match offset is decoded according to the XYZ bits in the token
;After all this, d0 is shifted up by 3 bits
move.w d0,d1
moveq #-1,d2 ; offset is "naturally" negative
add.b d1,d1 ; read top bit
bcs.s .matchbits_1
.matchbits_0:
; top bit 0
add.b d1,d1 ; read top bit
bcs.s .matchbits_01
;00Z 5-bit offset: read a nibble for offset bits 1-4 and use the inverted bit Z of the token as bit 0 of the offset. set bits 5-15 of the offset to 1.
bsr .read_nybble ;d2 = nybble
eor.b #$80,d1 ;read reverse of "Z" bit into carry
add.b d1,d1 ;reversed bit put in X flag
addx.b d2,d2 ;shift up and combine carry
or.b #%11100000,d2 ;ensure top bits are set again
bra.s .match_offset_done
.matchbits_01:
;01Z 9-bit offset: read a byte for offset bits 0-7 and use the inverted bit Z for bit 8 of the offset.
;set bits 9-15 of the offset to 1.
add.w d1,d1 ;read reverse of "Z" bit into carry
clr.b d1
eor.w d1,d2 ;flip bit 8 if needed
move.b (a0)+,d2 ;offset bits 0-7
bra.s .match_offset_done
.matchbits_1:
add.b d1,d1 ; read top bit
bcs.s .matchbits_11
;10Z 13-bit offset: read a nibble for offset bits 9-12 and use the inverted bit Z for bit 8 of the offset,
;then read a byte for offset bits 0-7. set bits 13-15 of the offset to 1.
bsr.s .read_nybble
eor.b #$80,d1 ;read reverse of "Z" bit into carry
add.b d1,d1 ;reversed bit put in X flag
addx.b d2,d2 ;shift up and combine carry
or.b #%11100000,d2 ;ensure top bits are set again
lsl.w #8,d2 ;move [0:4] up to [12:8]
move.b (a0)+,d2 ;read bits 0-7
sub.w #$200,d2 ;undocumented offset -- add 512 byte offset
bra.s .match_offset_done
.matchbits_11:
add.b d1,d1 ; read top bit
bcs.s .matchbits_111
;110 16-bit offset: read a byte for offset bits 8-15, then another byte for offset bits 0-7.
;CAUTION This is big-endian!
move.b (a0)+,d2 ; low part
lsl.w #8,d2
move.b (a0)+,d2 ; high part
bra.s .match_offset_done
.matchbits_111:
;111 repeat offset: reuse the offset value of the previous match command.
move.l d4,d2
.match_offset_done:
move.l d2,d4 ; d4 = previous match
lea (a2,d2.l),a3 ; a3 = match source (d2.w already negative)
; ============ MATCH LENGTH EXTRA ==============
; Match Length
move.w d0,d1 ; clear top bits of length
and.w #%00000111,d1 ; d1 = match length 0-7
addq.w #2,d1 ; d1 = match length 2-9
cmp.w #2+7,d1
bne.s .match_length_done
; read nybble and add
bsr .read_nybble
;* 0-14: the value is added to the 7 stored in the token, and then the minmatch
; of 2 is added, to compose the final match length.
add.b d2,d1
cmp.b #2+7+15,d1
bne.s .match_length_done
;* 15: an extra byte follows
;If an extra byte follows here, it can have two possible types of value:
;* 0-231: 24 is added to the value (7 from the token + 15 from the nibble + minmatch of 2),
;to compose the final match length.
add.b (a0)+,d1
bcc.s .match_length_done
;* 233: a second and third byte follow, forming a little-endian 16-bit value.
;*The final encoded match length is that 16-bit value.
move.b (a0)+,-(a5) ; low part
move.b (a0)+,-(a5) ; high part
move.w (a5)+,d1
.match_length_done:
.copy_match:
subq.w #1,d1 ; -1 for dbf
; " the encoded match length is the actual match length offset by the minimum, which is 3 bytes"
.copy_match_loop:
move.b (a3)+,(a2)+
dbf d1,.copy_match_loop
bra .loop
; returns next nibble in d2
; nybble status in d3; top bit set means "read next byte"
.read_nybble:
tst.b d3 ; anything in the buffer?
bmi.s .next_byte
move.b d3,d2 ; copy buffer contents
moveq #-1,d3 ; flag buffer is empty
rts
.next_byte:
; buffer is empty, so prime next
move.b (a0)+,d3 ; fetch
move.b d3,d2
lsr.b #4,d2 ; d1 = top 4 bits shifted down (result)
and.b #$f,d3 ; d3 = remaining bottom 4 bits, with "empty" flag cleared
rts