Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Failures fixing & pages parsing for usernames #2

Merged
merged 2 commits into from
Jul 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 23 additions & 35 deletions sherlock/resources/data.json
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,12 @@
"rank": 0,
"url": "https://cash.me/${}",
"urlMain": "https://cash.me/",
"request_head_only": false,
"username_claimed": "Jenny",
"username_unclaimed": "noonewouldeverusethis7"
"username_unclaimed": "noonewouldeverusethis7",
"errors": {
"Cash isn't available in your country yet.": "Access denied in your country, use tor/proxy"
}
},
"Cent": {
"errorMsg": "<title>Cent</title>",
Expand Down Expand Up @@ -520,7 +524,7 @@
"username_unclaimed": "noonewouldeverusethis7"
},
"Ebay": {
"errorMsg": "The User ID you entered was not found",
"errorMsg": "<!-- - --></i><p class=\"sm-md\">",
"errorType": "message",
"rank": 56,
"url": "https://www.ebay.com/usr/{}",
Expand Down Expand Up @@ -858,10 +862,10 @@
},
"ImageShack": {
"errorType": "response_url",
"errorUrl": "https://imageshack.us/",
"errorUrl": "https://imageshack.com/",
"rank": 42021,
"url": "https://imageshack.us/user/{}",
"urlMain": "https://imageshack.us/",
"url": "https://imageshack.com/user/{}",
"urlMain": "https://imageshack.com/",
"username_claimed": "blue",
"username_unclaimed": "noonewouldeverusethis7"
},
Expand Down Expand Up @@ -1019,15 +1023,6 @@
"username_claimed": "blue",
"username_unclaimed": "noonewouldeverusethis7"
},
"LiveLeak": {
"errorMsg": "channel not found",
"errorType": "message",
"rank": 3625,
"url": "https://www.liveleak.com/c/{}",
"urlMain": "https://www.liveleak.com/",
"username_claimed": "blue",
"username_unclaimed": "noonewouldeverusethis"
},
"Lobsters": {
"errorType": "status_code",
"rank": 152798,
Expand All @@ -1047,7 +1042,8 @@
"username_unclaimed": "noonewouldeverusethis7"
},
"Medium": {
"errorType": "status_code",
"errorMsg": "We couldn’t find this page.",
"errorType": "message",
"rank": 72,
"url": "https://medium.com/@{}",
"urlMain": "https://medium.com/",
Expand Down Expand Up @@ -1408,15 +1404,6 @@
"username_claimed": "blue",
"username_unclaimed": "noonewouldeverusethis7"
},
"Redsun.tf": {
"errorMsg": "The specified member cannot be found",
"errorType": "message",
"rank": 3796657,
"url": "https://forum.redsun.tf/members/?username={}",
"urlMain": "https://redsun.tf/",
"username_claimed": "dan",
"username_unclaimed": "noonewouldeverusethis"
},
"Repl.it": {
"errorMsg": "404",
"errorType": "message",
Expand Down Expand Up @@ -1591,8 +1578,12 @@
"rank": 87,
"url": "https://open.spotify.com/user/{}",
"urlMain": "https://open.spotify.com/",
"request_head_only": false,
"username_claimed": "blue",
"username_unclaimed": "noonewouldeverusethis7"
"username_unclaimed": "noonewouldeverusethis7",
"errors": {
"Spotify is currently not available in your country.": "Access denied in your country, use tor/proxy"
}
},
"Star Citizen": {
"errorType": "status_code",
Expand Down Expand Up @@ -1812,14 +1803,6 @@
"username_claimed": "jenny",
"username_unclaimed": "noonewouldeverusethis7"
},
"Viadeo": {
"errorType": "status_code",
"rank": 16796,
"url": "http://fr.viadeo.com/en/profile/{}",
"urlMain": "http://fr.viadeo.com/en/",
"username_claimed": "franck.patissier",
"username_unclaimed": "noonewouldeverusethis"
},
"Vimeo": {
"errorType": "status_code",
"rank": 169,
Expand Down Expand Up @@ -2431,7 +2414,11 @@
"url": "https://pvpru.com/board/member.php?username={}&tab=aboutme#aboutme",
"urlMain": "https://pvpru.com/",
"username_claimed": "blue",
"username_unclaimed": "noonewouldeverusethis7"
"request_head_only": false,
"username_unclaimed": "noonewouldeverusethis7",
"errors": {
"Access denied": "Cloudflare security protection detected"
}
},
"radio_echo_msk": {
"errorType": "status_code",
Expand Down Expand Up @@ -2568,7 +2555,8 @@
"username_unclaimed": "noonewouldeverusethis77777"
},
"kofi": {
"errorType": "status_code",
"errorMsg": "Make income from your art!",
"errorType": "message",
"rank": 89891,
"url": "https://ko-fi.com/{}",
"urlMain": "https://ko-fi.com",
Expand Down
51 changes: 41 additions & 10 deletions sherlock/sherlock.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"""

import csv
import json
import logging
import os
import platform
Expand All @@ -17,7 +18,7 @@
from time import monotonic

import requests
from socid_extractor import extract
from socid_extractor import parse, extract

from requests_futures.sessions import FuturesSession
from torrequest import TorRequest
Expand Down Expand Up @@ -298,6 +299,9 @@ def sherlock(username, site_data, query_notify,
# Get the expected error type
error_type = net_info["errorType"]

# Get the failure messages and comments
failure_errors = net_info.get("errors", {})

# Retrieve future and ensure it has finished
future = net_info["request_future"]
r, error_text, expection_text = get_response(request_future=future,
Expand All @@ -321,6 +325,17 @@ def sherlock(username, site_data, query_notify,
except:
response_text = ""


# Detect failures such as a country restriction
for text, comment in failure_errors.items():
if text in r.text:
error_context = "Some error"
error_text = comment
break

# TODO: return error for captcha and some specific cases (CashMe)
# make all result invalid

extracted_ids_data = ""

if ids_search and r:
Expand Down Expand Up @@ -509,22 +524,30 @@ def main():
action="store_true", dest="no_color", default=False,
help="Don't color terminal output"
)
parser.add_argument("--browse", "-b",
action="store_true", dest="browse", default=False,
help="Browse to all results on default bowser."
)
parser.add_argument("--ids", "-i",
action="store_true", dest="ids_search", default=False,
help="Make scan of pages for other usernames and recursive search by them."
)
parser.add_argument("--parse",
dest="parse_url", default='',
help="Parse page by URL and extract username and IDs to use for search."
)
parser.add_argument("username",
nargs='+', metavar='USERNAMES',
action="store",
help="One or more usernames to check with social networks."
)
parser.add_argument("--browse", "-b",
action="store_true", dest="browse", default=False,
help="Browse to all results on default bowser.")
parser.add_argument("--ids", "-i",
action="store_true", dest="ids_search", default=False,
help="Make scan of pages for other usernames and recursive search by them.")

args = parser.parse_args()
# Argument check

# Usernames initial list
usernames = args.username

# Argument check
# TODO regex check on args.proxy
if args.tor and (args.proxy is not None):
raise Exception("Tor and Proxy cannot be set at the same time.")
Expand All @@ -547,6 +570,16 @@ def main():
print("You can only use --output with a single username")
sys.exit(1)

if args.parse_url:
page, _ = parse(args.parse_url)
info = extract(page)
text = 'Extracted ID data from webpage: ' + ', '.join([f'{a}: {b}' for a,b in info.items()])
print(text)
for k, v in info.items():
if 'username' in k:
usernames.append(v)

usernames = [u for u in usernames if u not in ('-')]

#Create object with all information about sites we are aware of.
try:
Expand Down Expand Up @@ -599,8 +632,6 @@ def main():
print_found_only=args.print_found_only,
color=not args.no_color)

# Run report on all specified users.
usernames = [*args.username]
already_checked = set()

while usernames:
Expand Down