Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added a new InfoExtractor for XNXX webpages #398

Merged
merged 2 commits into from
Sep 27, 2012
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions youtube_dl/InfoExtractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2955,3 +2955,73 @@ def _real_extract(self, url):
}

return [info]

class XNXXIE(InfoExtractor):
"""Information extractor for xnxx.com"""

_VALID_URL = r'^http://video\.xnxx\.com/video([0-9]+)/(.*)'
IE_NAME = u'xnxx'
VIDEO_URL_RE = r'flv_url=(.*?)&'
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'

def report_webpage(self, video_id):
"""Report information extraction"""
self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))

def report_extraction(self, video_id):
"""Report information extraction"""
self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))

def extract_video_url(self, webpage):
"Extract the url for the video from the webpage"

result = re.search(self.VIDEO_URL_RE, webpage)
if result is None:
self._downloader.trouble(u'ERROR: unable to extract video url')
return urllib.unquote(result.group(1).decode('utf-8'))

def extract_video_title(self, webpage):
"Extract the title for the video from the webpage"

result = re.search(self.VIDEO_TITLE_RE, webpage)
if result is None:
self._downloader.trouble(u'ERROR: unable to extract video title')
return result.group(1).decode('utf-8')

def extract_video_thumbnail(self, webpage):
"Extract the thumbnail for the video from the webpage"

result = re.search(self.VIDEO_THUMB_RE, webpage)
if result is None:
self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
return result.group(1).decode('utf-8')

def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
video_id = mobj.group(1).decode('utf-8')

self.report_webpage(video_id)

# Get webpage content
try:
webpage = urllib2.urlopen(url).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % err)
return

info = {'id': video_id,
'url': self.extract_video_url(webpage),
'uploader': None,
'upload_date': None,
'title': self.extract_video_title(webpage),
'ext': 'flv',
'format': 'flv',
'thumbnail': self.extract_video_thumbnail(webpage),
'description': None,
'player_url': None}

return [info]
1 change: 1 addition & 0 deletions youtube_dl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,7 @@ def gen_extractors():
MixcloudIE(),
StanfordOpenClassroomIE(),
MTVIE(),
XNXXIE(),

GenericIE()
]
Expand Down