This repository has been archived by the owner on Sep 1, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy path2021-nitropdf_com.py
73 lines (61 loc) · 2.7 KB
/
2021-nitropdf_com.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from parsers import base
import collections
class Parse(base.Parser):
"""
NitroPDF.com 2021 breach data parser
Source File SHA-1: 50dbf53333ef77ef59cd170be4c33931e613b8d9 nitrocloud.tsv
Good Lines: 76,856,990
"""
name = "None"
web = "www.nitropdf.com"
year = "2021"
def row_format(self, r: str) -> tuple:
"""
Header: id bigint NOT NULL,
tmp_admin boolean DEFAULT false,
agreed boolean NOT NULL,
created timestamp without time zone,
email character varying(255) NOT NULL,
firstname character varying(255),
lastname character varying(255),
password character varying(255),
passwordreset character varying(255),
verified boolean NOT NULL,
avatar character varying(255),
settings integer DEFAULT 0 NOT NULL,
source character varying(255),
notifications integer DEFAULT 0 NOT NULL,
status character varying(255) DEFAULT 'ACTIVE'::character varying NOT NULL,
secret character varying(255) DEFAULT '123abc'::character varying NOT NULL,
confirmed_client_access boolean DEFAULT false NOT NULL,
account_id bigint NOT NULL,
timezone character varying(255),
dateformat character varying(255),
verify_remind timestamp(6) without time zone,
desktop_version character varying(255),
locale character varying(10),
prompts integer DEFAULT 0 NOT NULL,
title character varying(255),
company character varying(255),
sem_id bigint,
updated_at timestamp without time zone,
tos_pp_accepted_at timestamp without time zone,
remote_ip character varying(50)
Table Name: users.user_credential
name,website,year,domain,email,password,hash,salt
:param r:
:return:
"""
row = r.split('\t')
email = row[4].replace('\'', '').strip()
pw_hash = row[7].replace('\'', '').strip()
domain = email.split('@')[1] if '@' in email else ''
return self.name, self.web, int(self.year), domain, email, '', pw_hash, ''
def process_rows(self) -> collections.abc.Iterable[tuple]:
with open(self.source, 'r', encoding='utf-8', errors='ignore') as source:
for row in source:
if row is None:
continue
if len(row.split('\t')) < 29:
continue
yield self.row_format(row)