-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregistry_processor.py
153 lines (128 loc) · 6.87 KB
/
registry_processor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# registry_processor.py
import os
import re
import yaml
from zipfile import ZipFile, is_zipfile
from py7zr import SevenZipFile, is_7zfile
import logging
from file_operations import compute_sha1
def reg_to_yaml(reg_content):
# Split the input content into lines
lines = reg_content.splitlines()
# Initialize an empty dictionary to store registry entries
reg_entries = {}
# Variable to keep track of the current registry key
current_key = None
# Iterate through each line in the input content
for i in range(len(lines)):
# Check if the line starts with "[" to identify a new registry key
if lines[i].startswith("["):
# Extract the registry key from the line and set it as the current key
current_key = lines[i].strip("[]")
current_key_parts = current_key.split('\\')
root_current_key = current_key_parts[0]
rest_current_key = '"' + '\\\\'.join(current_key_parts[1:]) + '"'
if root_current_key not in reg_entries:
reg_entries[root_current_key] = {}
reg_entries[root_current_key][rest_current_key] = {}
# Check if the line contains "=" to identify a registry entry
elif "=" in lines[i]:
# Split the line into parts using "=" as a delimiter
parts = re.split(r'\s*=\s*', lines[i])
# Extract the name of the registry entry (removing surrounding quotes)
name = parts[0].strip('"')
# Check if there are more than one part (i.e., there is a value part)
if len(parts) > 1:
# Extract the value part
value_part = parts[1]
# Check if the value part contains a colon (":")
if ':' in value_part:
# Split the value part into value type and value
value_type, value = value_part.split(':', 1)
# Strip whitespace from value type and value
value_type = value_type.strip()
value = value.strip()
# Check the value type and format the value accordingly
if value_type == "dword":
# For dword, convert the value to an integer in base 16
reg_entries[root_current_key][rest_current_key][name] = {value_type: int(value, 16)}
elif value_type == "hex":
# If it ends with a "\" character, concatenate the with the next line (it was a line break)
#e.g. hex:36,37,37,30,30,31,32,30,36,36,38,37,\
#34,30,39,34,36,33,30,00
if value.endswith("\\"):
value = value.strip('\\') + lines[i+1].strip()
# For hex, format the value as a dictionary with a binary key
reg_entries[root_current_key][rest_current_key][name] = {'binary': [int(x, 16) for x in value.split(",")]}
elif value_type == "sz":
# For sz, format the value as a string
reg_entries[root_current_key][rest_current_key][name] = {value_type: value.strip('"')}
else:
# Handle other value types if needed
pass
else:
# Handle entries without value types and colon (":")
# In this example, treat them as string values
reg_entries[root_current_key][rest_current_key][name] = {'sz': value_part.strip('"')}
return reg_entries
def yaml_to_file(reg_entries, output_folder_path: str):
file_path = os.path.join(output_folder_path, "registry.yaml")
#print(os.path.isdir(output_folder_path))
os.makedirs(output_folder_path, exist_ok=True)
with open(file_path, "w") as file:
yaml.dump(reg_entries, file, default_flow_style=False, explicit_start=True)
#yaml.dump(reg_entries, file)
# Read the content of the YAML file
with open(file_path, 'r') as file:
content = file.read()
# Replace single quotes in the content
modified_content = content.replace('\'', '')
# Write the modified content back to the file
with open(file_path, 'w') as file:
file.write(modified_content)
def convert_registry_to_yaml(gsm_gsba_file_path: str, output_folder_path: str):
file_data = None
output_folder_path = os.path.abspath(output_folder_path)
# Check if the file ends in .gsba
if not gsm_gsba_file_path.endswith('.gsba'):
logging.error(f"File {gsm_gsba_file_path} does not end in .gsba")
return
# Check if its a .7z file
if is_7zfile(gsm_gsba_file_path):
try:
with SevenZipFile(gsm_gsba_file_path, mode='r') as z:
extracted_files = z.read()
# Process the extracted data further or store it for later use
for file_path, file_data_io in extracted_files.items():
if not file_path.endswith('/') and file_path.endswith('.reg'):
# Convert BytesIO object to bytes-like object and decode it
file_data = file_data_io.getvalue().decode('utf-8')
except Exception as e:
logging.error(f"Error with SevenZipFile: {e}. Probably the .gsba file is either corrupted or of zip format instead of 7z.")
# Check if its a .zip file
elif is_zipfile(gsm_gsba_file_path):
try:
with ZipFile(gsm_gsba_file_path, 'r') as zip_ref:
extracted_files = zip_ref.namelist()
for file_path in extracted_files:
if not file_path.endswith('/') and file_path.endswith('.reg'):
# Convert BytesIO object to bytes-like object and decode it
file_data = zip_ref.read(file_path).decode('utf-8')
except Exception as e:
logging.error(f"Error with ZipFile: {e}")
else:
logging.error(f"File {gsm_gsba_file_path} is not a .7z or .zip file, at heart. The .gsba file might be corrupted.")
return
if file_data is None:
logging.error("No registry data found in the archive.")
return None
else:
try:
reg_entries = reg_to_yaml(file_data) #reg_to_yaml(str(file_data))
yaml_to_file(reg_entries, output_folder_path)
# Compute the sha1 hash of the registry file
reg_hash = compute_sha1(os.path.join(output_folder_path, "registry.yaml"))
return reg_hash
except Exception as e:
logging.error(f"Error in converting registry to YAML: {e}")
return None