-
Notifications
You must be signed in to change notification settings - Fork 128
/
dx11shaderanalyse.py
executable file
·427 lines (358 loc) · 14.8 KB
/
dx11shaderanalyse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
#!/usr/bin/env python3
import sys, os, argparse
import struct, hashlib, codecs, zlib
from collections import namedtuple
import numpy as np
import math
import extract_unity_shaders
import io
system_values = {
0: 'NONE', # or TARGET, or SPRs: COVERAGE, DEPTH, DEPTHGE, DEPTHLE, ...
1: 'POS',
2: 'CLIPDST',
3: 'CULLDST',
4: 'RTINDEX',
5: 'VPINDEX',
6: 'VERTID',
7: 'PRIMID',
8: 'INSTID',
9: 'FFACE',
10: 'SAMPLE',
11: 'QUADEDGE',
12: 'QUADINT',
13: 'TRIEDGE',
14: 'TRIINT',
15: 'LINEDET',
16: 'LINEDEN',
}
types = {
1: 'uint',
2: 'int',
3: 'float'
}
verbosity = 0
def lookup(id, dict):
return dict.get(id, "Unknown ({})".format(id))
def pr_verbose(*a, verbosity=1, **kw):
if globals()['verbosity'] >= verbosity:
print(*a, **kw)
def parse_dxbc_header(stream):
DXBCHeader = namedtuple('DXBCHeader',
['signature', 'hash', 'unknown1', 'size', 'chunks'])
header = DXBCHeader(*struct.unpack('<4s16s3I', stream.read(0x20)))
assert(header.signature == b'DXBC')
assert(header.unknown1 == 1)
return header
def get_chunk_offsets(stream, header):
return struct.unpack('<{}I'.format(header.chunks), stream.read(4 * header.chunks))
def c_str(buf):
return buf[:buf.find(b'\0')].decode('ascii')
# def get_SysValue_name(val):
# return {
# 0: 'NONE',
# 1: 'POS'
# }.get(val, 'UNKNOWN')
def mask_components(mask):
r = [' ']*4
assert((mask & ~0xf) == 0)
if mask & 0x1: r[0] = 'x'
if mask & 0x2: r[1] = 'y'
if mask & 0x4: r[2] = 'z'
if mask & 0x8: r[3] = 'w'
return r
def mask_str(mask):
return ''.join(mask_components(mask)).rstrip()
def reg_mask(mask):
# Cleaner output, but does not match MS's disassembler:
if mask == 0 or mask == 0xf:
# Less clean:
# if mask == 0:
return ''
return '.' + ''.join(filter(lambda x: x != ' ' and x or None, mask_components(mask)))
def decode_sgn(buf, output, size=24):
(num_regs, u1) = struct.unpack('<2I', buf[0:8])
pr_verbose(' Registers: {}'.format(num_regs))
pr_verbose(' Unknown 1: {:#x}'.format(u1))
assert(u1 == 8)
stream = min_precision = None
for reg in range(num_regs):
offset = 8 + size*reg # Is the 8 offset from u1?
if size == 24:
(semantic_off, index, sv, type, reg_num, mask, used, u6) = \
struct.unpack('<5I2BH', buf[offset:offset+size])
elif size == 28: # OSG5
(stream, semantic_off, index, sv, type, reg_num, mask, used, u6) = \
struct.unpack('<6I2BH', buf[offset:offset+size])
elif size == 32: # ISG1, OSG1, PSG1
(stream, semantic_off, index, sv, type, reg_num, mask, used, u6, min_precision) = \
struct.unpack('<6I2BHI', buf[offset:offset+size])
else:
assert(False) #BUG
semantic = c_str(buf[semantic_off:])
io = output and 'output' or 'input'
reg_prefix = output and 'o' or 'v'
pr_verbose(' dcl_{} {}{}{} : {}{}'.format(
io, reg_prefix, reg_num, reg_mask(mask), semantic, index or ''), verbosity=0) # WIP
if stream is not None:
pr_verbose(' | Stream: {}'.format(stream))
pr_verbose(' | Semantic: {}'.format(semantic))
pr_verbose(' | Index: {}'.format(index))
# Not all semantics have an obvious system value, e.g. SV_Target uses
# NONE, and SV_Position uses POS as an output from the VS and input to
# the PS, but NONE when it's an input to the VS
pr_verbose(' | System Value: {} ({})'.format(lookup(sv, system_values), sv))
assert sv in system_values
pr_verbose(' | Type: {}'.format(lookup(type, types)))
assert type in types
pr_verbose(' | Register: {}'.format(reg_num))
# assert(reg == reg_num) # Too strict - the register number may be
# reused so long as the mask is non-overlapping
if min_precision is not None:
pr_verbose(' |Min Precision: {}'.format(min_precision))
# Mask / used is a bit funky - used is often blank in outputs, sometimes not a subset of mask?
pr_verbose(' | Mask: {} (0x{:x})'.format(mask_str(mask), mask), verbosity=1)
pr_verbose(' | Used: {} (0x{:x})'.format(mask_str(used), used), verbosity=1)
pr_verbose(' | Unknown 6: {:#x}'.format(u6))
# if output:
# assert(used & mask == 0)
assert(u6 == 0)
# print(' \\' + r'-'*13)
def decode_isgn(buf): return decode_sgn(buf, False, 24)
def decode_isg1(buf): return decode_sgn(buf, False, 32)
def decode_osgn(buf): return decode_sgn(buf, True, 24)
def decode_osg1(buf): return decode_sgn(buf, True, 32)
def decode_osg5(buf): return decode_sgn(buf, True, 28)
def decode_pcsg(buf): return decode_sgn(buf, True, 24)
def decode_psg1(buf): return decode_sgn(buf, True, 32)
shader_types = {
0: 'ps',
1: 'vs',
2: 'gs',
3: 'hs',
4: 'ds',
5: 'cs',
}
def get_shader_model_section(buf, section):
version, shader_type = struct.unpack('<2H', buf[:4])
shader_type = shader_types[shader_type]
major = version >> 4
minor = version & 0xf
shader_model = ('{}_{}_{}'.format(shader_type, major, minor))
pr_verbose(' {}'.format(shader_model), verbosity=0)
return shader_model
def get_shader_model_shdr(buf):
return get_shader_model_section(buf, 'SHDR')
def get_shader_model_shex(buf):
return get_shader_model_section(buf, 'SHEX')
chunks = {
b'ISGN': decode_isgn, # "Input signature"
b'ISG1': decode_isg1,
b'OSGN': decode_osgn, # "Output signature"
b'OSG1': decode_osg1,
b'OSG5': decode_osg5,
b'PCSG': decode_pcsg, # "Patch Constant signature", for domain shaders
b'PSG1': decode_psg1,
b'SHEX': get_shader_model_shex,
b'SHDR': get_shader_model_shdr,
# TODO: 'SHEX' / 'SHDR', maybe 'STAT', etc.
}
shader_model_sections = {
b'SHEX': get_shader_model_shex,
b'SHDR': get_shader_model_shdr,
}
def get_chunk_info(stream, offset):
stream.seek(offset)
(signature, size) = struct.unpack('<4sI', stream.read(8))
return (signature, size)
hash_sections = (
b"SHDR", b"SHEX", # Bytecode
b"ISGN", b"ISG1", # Input signature
b"PCSG", b"PSG1", # Patch constant signature
b"OSGN", b"OSG5", b"OSG1", # Output signature
)
def _calc_chunk_bytecode_hash(signature, buf, bytecode_hash):
if signature in hash_sections:
# crc32c is not available in Python's standard libraries yet, use crcmod:
import crcmod.predefined
bytecode_hash = crcmod.predefined.mkPredefinedCrcFun("crc-32c")(buf, bytecode_hash)
return bytecode_hash
def calc_chunk_bytecode_hash(stream, offset, bytecode_hash):
# Called from generic_shader_extractor
(signature, size) = get_chunk_info(stream, offset)
buf = stream.read(size)
return _calc_chunk_bytecode_hash(signature, buf, bytecode_hash)
def decode_chunk_at(stream, offset, bytecode_hash):
(signature, size) = get_chunk_info(stream, offset)
buf = stream.read(size)
if verbosity >= 1:
print("{} chunk at 0x{:08x} size {}".format(signature.decode('ASCII'), offset, size))
elif verbosity >= 0 or bytecode_hash is not None:
print('{}'.format(signature.decode('ASCII')))
if signature in chunks:
chunks[signature](buf)
if bytecode_hash is not None:
return _calc_chunk_bytecode_hash(signature, buf, bytecode_hash)
def check_chunk_for_shader_model(stream, offset):
(signature, size) = get_chunk_info(stream, offset)
if signature in shader_model_sections:
return shader_model_sections[signature](stream.read(size))
def get_chunk(stream, name):
header = parse_dxbc_header(stream)
chunk_offsets = get_chunk_offsets(stream, header)
for idx in range(header.chunks):
(signature, size) = get_chunk_info(stream, chunk_offsets[idx])
if signature == name:
return stream.read(size)
md5_s = [7, 12, 17, 22]*4 + [5, 9, 14, 20]*4 + [4, 11, 16, 23]*4 + [6, 10, 15, 21]*4
md5_K = [ np.uint32(math.floor(2**32 * abs(math.sin(i)))) for i in range(1, 65) ]
def shader_hash(message, real_md5=False):
'''
Follows the MD5 psuedocode from:
https://en.wikipedia.org/wiki/Md5
If real_md5=False, will use a slight modification to the padding method to
generate the same obfuscated MD5 hashes as d3dcompiler.
'''
np.seterr(over='ignore')
message = bytearray(message)
# leftrotate function definition
def leftrotate (x, c):
return np.uint32(x << c) | np.uint32(x >> (32-c))
# Gotcha: length is in bits, not bytes:
orig_len_bytes = len(message)
orig_len_bits = np.uint64(orig_len_bytes * 8)
# Note: All variables are unsigned 32 bit and wrap modulo 2^32 when calculating
# s specifies the per-round shift amounts
# s = [7, 12, 17, 22]*4 + [5, 9, 14, 20]*4 + [4, 11, 16, 23]*4 + [6, 10, 15, 21]*4
s = md5_s
# Use binary integer part of the sines of integers (Radians) as constants:
# K = [ np.uint32(math.floor(2**32 * abs(math.sin(i)))) for i in range(1, 65) ]
# Not pre-calculating for a low hanging (but marginal) performance boost:
K = md5_K
# Initialize variables:
a0 = np.uint32(0x67452301) # A
b0 = np.uint32(0xefcdab89) # B
c0 = np.uint32(0x98badcfe) # C
d0 = np.uint32(0x10325476) # D
# Pre-processing: adding a single 1 bit
# append "1" bit to message /* Notice: the input bytes are considered as bits
# strings, where the first bit is the most significant bit of the byte.
message.append(0x80)
# Pre-processing: padding with zeros
# append "0" bit until message length in bits ≡ 448 (mod 512)
pad = 64 - (len(message) % 64)
if pad < 8:
message.extend([0] * (64 + pad - 8))
else:
message.extend([0] * (pad - 8))
# append original length in bits mod (2 pow 64) to message
# XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
# XXX
# XXX MS Implementation differs from RSA MD5 only in the way the size is
# XXX used to pad the final block.
# XXX
# XXX The Real MD5 Implementation would use:
# XXX message.extend(struct.pack('<Q', orig_len_bits)) # 64bit size
# XXX
# XXX But here they *insert* that at the *start* of the final 512bit block
# XXX as a *32bit* little-endian value, and add a second *31bit* size in
# XXX *bytes* at the end of the block shifted left with a final 1 added.
# XXX
# XXX I was wondering if they had simply made an error when implementing
# XXX it, however, while standards can be hard to read and the reference
# XXX implementation is needlessly complex - the part on padding with the
# XXX size is pretty damn clear, and this is a little too bizzare to be by
# XXX accident. Therefore, it appears they intentionally obfuscated it, for
# XXX whatever pointless and misguided reason they thought they had.
# XXX
# XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
if real_md5:
message.extend(struct.pack('<Q', orig_len_bits))
else:
message = message[:-56] + struct.pack('<I', orig_len_bits) + message[-56:]
message.extend(struct.pack('<I', (orig_len_bytes << 1) | 1))
assert(len(message) % 64 == 0)
# Process the message in successive 512-bit chunks:
# for each 512-bit chunk of message
while message:
# break chunk into sixteen 32-bit words M[j], 0 ≤ j ≤ 15
M = struct.unpack('<16I', message[:64])
message = message[64:]
# Initialize hash value for this chunk:
A = a0
B = b0
C = c0
D = d0
# Main loop:
for i in range(64):
if i < 16:
F = (B & C) | (~B & D)
g = i
elif i < 32:
F = (D & B) | (~D & C)
g = np.uint32((5*i + 1) % 16)
elif i < 48:
F = B ^ C ^ D
g = np.uint32((3*i + 5) % 16)
else:
F = C ^ (B | ~D)
g = np.uint32((7*i) % 16)
dTemp = D
D = C
C = B
B = np.uint32(B + leftrotate(np.uint32(A + F + K[i] + M[g]), s[i]))
A = dTemp
# Add this chunk's hash to result so far:
a0 = np.uint32(a0 + A)
b0 = np.uint32(b0 + B)
c0 = np.uint32(c0 + C)
d0 = np.uint32(d0 + D)
# var char digest[16] := a0 append b0 append c0 append d0 //(Output is in little-endian)
return '%08x%08x%08x%08x' % struct.unpack('>4I', struct.pack('<4I', a0, b0, c0, d0))
def parse(stream):
if getattr(args, '3dmigoto_hash'):
stream = io.BytesIO(stream.read())
migoto_hash = extract_unity_shaders.fnv_3Dmigoto_shader(stream.getbuffer())
print('3DMigoto hash: %016x' % migoto_hash)
header = parse_dxbc_header(stream)
pr_verbose(header, verbosity=2)
chunk_offsets = get_chunk_offsets(stream, header)
pr_verbose('Embedded hash:', codecs.encode(header.hash, 'hex').decode('ascii'))
if args.hash:
stream.seek(20)
print('Header size:', header.size)
hashable = stream.read(header.size - 20)
assert(len(hashable) + 20 == header.size)
# print(' MD5sum:', hashlib.md5(stream.read(header.size - 20)).hexdigest())
print(' DXBC hash:', shader_hash(hashable))
bytecode_hash = None
if args.bytecode_hash:
bytecode_hash = 0
for idx in range(header.chunks):
bytecode_hash = decode_chunk_at(stream, chunk_offsets[idx], bytecode_hash)
if args.bytecode_hash:
print('Bytecode hash: %08x' % bytecode_hash)
def parse_args():
global args, verbosity
parser = argparse.ArgumentParser(description = 'DX11 Shader Binary Analysis Tool')
parser.add_argument('files', nargs='+',
help='List of shader binary files to process')
parser.add_argument('--verbose', '-v', action='count', default=0,
help='Level of verbosity')
parser.add_argument('--quiet', '-q', action='count', default=0,
help='Surpress informational messages')
parser.add_argument('--hash', action='store_true',
help='Calculate the obfuscated MD5-like hash used by DX shaders')
parser.add_argument('--bytecode-hash', action='store_true',
help='Calculate the bytecode+signature hash, e.g. use to correlate shaders that only differ by debug info, etc.')
parser.add_argument('--3dmigoto-hash', action='store_true',
help='Calculate the default hash used by 3DMigoto')
args = parser.parse_args()
verbosity = args.verbose - args.quiet
def main():
parse_args()
for filename in args.files:
print('\nparsing {}...'.format(filename))
parse(open(filename, 'rb'))
if __name__ == '__main__':
sys.exit(main())
# vi: et sw=4:ts=4