This repository has been archived by the owner on Sep 1, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy path2022-ggcorp_me.py
81 lines (67 loc) · 3.08 KB
/
2022-ggcorp_me.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from parsers import base
import collections
class Parse(base.Parser):
"""
GGCorp.me breach data parser
Source File SHA-1:
Good Lines: 2,375,614
"""
name = "None"
web = "ggcorp.me"
year = "2022"
def row_format(self, r: str) -> tuple:
"""
Header: `uid` int(10) NOT NULL AUTO_INCREMENT,
`username` varchar(100) DEFAULT NULL,
`email` varchar(255) DEFAULT NULL,
`name` varchar(500) DEFAULT NULL,
`mobile_number` varchar(20) DEFAULT NULL,
`country_code` varchar(3) NOT NULL DEFAULT 'US',
`fbid` varchar(255) NOT NULL,
`access_token` text NOT NULL,
`auth_token` varchar(100) NOT NULL,
`ip` varchar(30) NOT NULL,
`create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`twitter_id` varchar(255) DEFAULT '0',
`twitter_access_token` text,
`twitter_access_secret` text,
`gender` varchar(10) DEFAULT NULL,
`date_of_birth` date NOT NULL DEFAULT '0000-00-00',
`password` varchar(200) DEFAULT NULL,
`image` varchar(1000) DEFAULT NULL,
`follower_count` int(45) DEFAULT '0',
`device_token` text,
`android_device_token` text,
`is_admin` int(1) DEFAULT '0',
`timezone` varchar(100) NOT NULL DEFAULT 'America/Los_Angeles',
`displaying_post_date` date DEFAULT NULL,
`is_device_active` tinyint(1) NOT NULL DEFAULT '0',
`shared_for_date` varchar(15) NOT NULL DEFAULT '0000-00-00' COMMENT 'YYYY-MM-DD-SESSION',
`show_second_session_date` date NOT NULL DEFAULT '0000-00-00',
`apple_idfa` text,
`google_advertiser_id` text,
`stickers_left` int(10) NOT NULL DEFAULT '5',
`deleted_at` timestamp NULL DEFAULT NULL,
`updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
Table Name: Users
name,website,year,domain,email,password,hash,salt
:param r:
:return:
"""
row = r.split(',')
email = row[3].replace('\'', '').strip()
pw_hash = row[2].replace('\'', '').strip()
domain = email.split('@')[1] if '@' in email else ''
# print(email + ':' + pw_hash)
return self.name, self.web, int(self.year), domain, email, '', pw_hash, ''
def process_rows(self) -> collections.abc.Iterable[tuple]:
with open(self.source, 'r', encoding='utf-8', errors='ignore') as source:
for row in source:
if row is None:
continue
if not row.startswith(r"INSERT INTO `pre_ucenter_members` VALUES"):
continue
_, values = row.split('VALUES')
inserts = values.split(r'),(')
for value_tuple in inserts:
yield self.row_format(value_tuple)