Skip to content

Commit

Permalink
Merges in fix for #9
Browse files Browse the repository at this point in the history
  • Loading branch information
skoczen committed Feb 5, 2015
2 parents db49658 + 7e05f6c commit 072e0af
Show file tree
Hide file tree
Showing 13 changed files with 230 additions and 216 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,4 @@ README.html

shelf.db
.idea/*
venv
15 changes: 5 additions & 10 deletions django_seo_js/backends/base.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,20 @@
import importlib
import requests
from django.conf import settings
from django.http import HttpResponse
from django_seo_js import settings


DEFAULT_BACKEND = "django_seo_js.backends.PrerenderIO"
IGNORED_HEADERS = [
IGNORED_HEADERS = frozenset((
'connection', 'keep-alive', 'proxy-authenticate',
'proxy-authorization', 'te', 'trailers', 'transfer-encoding',
'upgrade', 'content-length', 'content-encoding'
]
))


class SelectedBackend(object):

def __init__(self, *args, **kwargs):
if getattr(settings, "SEO_JS_BACKEND", None):
module_path = getattr(settings, "SEO_JS_BACKEND")
else:
module_path = DEFAULT_BACKEND

module_path = settings.BACKEND
backend_module = importlib.import_module(".".join(module_path.split(".")[:-1]))
self.backend = getattr(backend_module, module_path.split(".")[-1])()

Expand All @@ -46,7 +41,7 @@ class RequestsBasedBackend(object):

def __init__(self, *args, **kwargs):
super(RequestsBasedBackend, self).__init__(*args, **kwargs)
self.requests = requests
self.session = requests.Session()

def build_django_response_from_requests_response(self, response):
r = HttpResponse(response.content)
Expand Down
46 changes: 28 additions & 18 deletions django_seo_js/backends/prerender.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
from django.conf import settings
from django_seo_js import settings
from base import SEOBackendBase, RequestsBasedBackend


class PrerenderIO(SEOBackendBase, RequestsBasedBackend):
"""Implements the backend for prerender.io"""
BASE_URL = "http://service.prerender.io/"
RECACHE_URL = "http://api.prerender.io/recache"
BASE_URL = "https://service.prerender.io/"
RECACHE_URL = "https://api.prerender.io/recache"

def __init__(self, *args, **kwargs):
super(SEOBackendBase, self).__init__(*args, **kwargs)
self.token = self._get_token()

def _get_token(self):
if not getattr(settings, "SEO_JS_PRERENDER_TOKEN"):
if settings.PRERENDER_TOKEN is None:
raise ValueError("Missing SEO_JS_PRERENDER_TOKEN in settings.")
return settings.SEO_JS_PRERENDER_TOKEN
return settings.PRERENDER_TOKEN

def get_response_for_url(self, url):
"""
Expand All @@ -24,13 +24,13 @@ def get_response_for_url(self, url):

if not url or "//" not in url:
raise ValueError("Missing or invalid url: %s" % url)
render_url = "%s%s" % (self.BASE_URL, url)

render_url = self.BASE_URL + url
headers = {
'X-Prerender-Token': self.token,
'Accept-Encoding': 'gzip',
}
r = self.requests.get(render_url, headers=headers, allow_redirects=False)
assert int(r.status_code) < 500
r = self.session.get(render_url, headers=headers, allow_redirects=False)
assert r.status_code < 500

return self.build_django_response_from_requests_response(r)

Expand All @@ -45,19 +45,18 @@ def update_url(self, url=None, regex=None):

headers = {
'X-Prerender-Token': self.token,
'Content-type': 'application/json',
'Accept-Encoding': 'gzip',
'Content-Type': 'application/json',
}
data = {
'prerenderToken': settings.SEO_JS_PRERENDER_TOKEN,
'prerenderToken': settings.PRERENDER_TOKEN,
}
if url:
data["url"] = url
if regex:
data["regex"] = regex

r = self.requests.post(self.RECACHE_URL, headers=headers, data=data)
return int(r.status_code) < 500
r = self.session.post(self.RECACHE_URL, headers=headers, data=data)
return r.status_code < 500


class PrerenderHosted(PrerenderIO):
Expand All @@ -67,13 +66,24 @@ class PrerenderHosted(PrerenderIO):
def __init__(self, *args, **kwargs):
super(SEOBackendBase, self).__init__(*args, **kwargs)
self.token = ""
if not getattr(settings, "SEO_JS_PRERENDER_URL", None):
if not settings.PRERENDER_URL:
raise ValueError("Missing SEO_JS_PRERENDER_URL in settings.")
if not getattr(settings, "SEO_JS_PRERENDER_RECACHE_URL", None):
if not settings.PRERENDER_RECACHE_URL:
raise ValueError("Missing SEO_JS_PRERENDER_RECACHE_URL in settings.")

self.BASE_URL = getattr(settings, "SEO_JS_PRERENDER_URL")
self.RECACHE_URL = getattr(settings, "SEO_JS_PRERENDER_RECACHE_URL")
self.BASE_URL = settings.PRERENDER_URL
self.RECACHE_URL = settings.PRERENDER_RECACHE_URL

def _get_token(self):
pass

def update_url(self, url=None):
"""
Accepts a fully-qualified url.
Returns True if successful, False if not successful.
"""
if not url:
raise ValueError("Neither a url or regex was provided to update_url.")
post_url = "%s%s" % (self.BASE_URL, url)
r = self.requests.post(post_url)
return int(r.status_code) < 500
82 changes: 11 additions & 71 deletions django_seo_js/helpers.py
Original file line number Diff line number Diff line change
@@ -1,84 +1,24 @@
from django.conf import settings
from django_seo_js import settings
from django_seo_js.backends import SelectedBackend


DEFAULT_IGNORED_EXTENSIONS = [
".js",
".css",
".xml",
".less",
".png",
".jpg",
".jpeg",
".gif",
".pdf",
".doc",
".txt",
".ico",
".rss",
".zip",
".mp3",
".rar",
".exe",
".wmv",
".doc",
".avi",
".ppt",
".mpg",
".mpeg",
".tif",
".wav",
".mov",
".psd",
".ai",
".xls",
".mp4",
".m4a",
".swf",
".dat",
".dmg",
".iso",
".flv",
".m4v",
".torrent"
]


def update_cache_for_url(url):
if getattr(settings, "SEO_JS_ENABLED", not settings.DEBUG):
if settings.ENABLED:
selector = SelectedBackend()
return selector.backend.update_url(url)
return False


def request_should_be_ignored(request):
# TODO: move these to a central settings/default area ala appconf.
# Note it's tougher than it looks because of the override_settings
# magical injection in tests.
if getattr(settings, "SEO_JS_IGNORE_URLS", None):
IGNORE_URLS = settings.SEO_JS_IGNORE_URLS
else:
IGNORE_URLS = ["/sitemap.xml", ]

ignore = False
for url in IGNORE_URLS:
for url in settings.IGNORE_URLS:
if url in request.path:
ignore = True
break
return True

if not ignore:
if getattr(settings, "SEO_JS_IGNORE_EXTENSIONS", None) is not None:
IGNORED_EXTENSIONS = settings.SEO_JS_IGNORE_EXTENSIONS
else:
IGNORED_EXTENSIONS = DEFAULT_IGNORED_EXTENSIONS
extension = None
last_dot = request.path.rfind(".")
if last_dot != -1:
extension = request.path[last_dot:]
if extension:
for ext in IGNORED_EXTENSIONS:
if extension == ext:
ignore = True
break
extension = None
last_dot = request.path.rfind(".")
if last_dot == -1:
# No extension found
return False

return ignore
extension = request.path[last_dot:]
return extension and extension in settings.IGNORE_EXTENSIONS
26 changes: 17 additions & 9 deletions django_seo_js/middleware/hashbang.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,24 @@
from django_seo_js import settings
from django_seo_js.backends import SelectedBackend
from django_seo_js.helpers import request_should_be_ignored
from django.conf import settings

import logging
logger = logging.getLogger(__name__)


class HashBangMiddleware(SelectedBackend):
def process_request(self, request):
if not request_should_be_ignored(request) and\
getattr(settings, "SEO_JS_ENABLED", not settings.DEBUG) and\
"_escaped_fragment_" in request.GET:
if not settings.ENABLED:
return

if request_should_be_ignored(request):
return

if "_escaped_fragment_" not in request.GET:
return

url = request.build_absolute_uri()
try:
return self.backend.get_response_for_url(url)
except:
pass
url = request.build_absolute_uri()
try:
return self.backend.get_response_for_url(url)
except Exception as e:
logger.exception(e)
59 changes: 21 additions & 38 deletions django_seo_js/middleware/useragent.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,34 @@
import re
from django.conf import settings
from django_seo_js import settings
from django_seo_js.backends import SelectedBackend
from django_seo_js.helpers import request_should_be_ignored


DEFAULT_SEO_JS_USER_AGENTS = [
"Googlebot",
"Yahoo",
"bingbot",
"Ask Jeeves",
"baiduspider",
"facebookexternalhit",
"twitterbot",
"rogerbot",
"linkedinbot",
"embedly",
"quoralink preview'",
"showyoubot",
"outbrain",
"pinterest",
"developersgoogle.com/+/web/snippet",
]
import logging
logger = logging.getLogger(__name__)


class UserAgentMiddleware(SelectedBackend):
def __init__(self, *args, **kwargs):
super(UserAgentMiddleware, self).__init__(*args, **kwargs)
if getattr(settings, "SEO_JS_USER_AGENTS", None):
agents = getattr(settings, "SEO_JS_USER_AGENTS")
else:
agents = DEFAULT_SEO_JS_USER_AGENTS
regex_str = "|".join(agents)
regex_str = "|".join(settings.USER_AGENTS)
regex_str = ".*?(%s)" % regex_str
self.USER_AGENT_REGEX = re.compile(regex_str, re.IGNORECASE)

def process_request(self, request):
# TODO: move to proper settings app pattern.
if (
not request_should_be_ignored(request) and
getattr(settings, "SEO_JS_ENABLED", not settings.DEBUG) and
"HTTP_USER_AGENT" in request.META and
self.USER_AGENT_REGEX.match(request.META["HTTP_USER_AGENT"])
):

url = request.build_absolute_uri()

try:
return self.backend.get_response_for_url(url)
except:
pass
if not request.ENABLED:
return

if request_should_be_ignored(request):
return

if "HTTP_USER_AGENT" not in request.META:
return

if not self.USER_AGENT_REGEX.match(request.META["HTTP_USER_AGENT"]):
return

url = request.build_absolute_uri()
try:
return self.backend.get_response_for_url(url)
except Exception as e:
logger.exception(e)
Loading

0 comments on commit 072e0af

Please sign in to comment.