diff --git a/performers/siteS3xusPerformer.py b/performers/siteS3xusPerformer.py new file mode 100644 index 00000000..d6cecebc --- /dev/null +++ b/performers/siteS3xusPerformer.py @@ -0,0 +1,73 @@ +import re +import scrapy + +from tpdb.BasePerformerScraper import BasePerformerScraper + + +class SiteS3xusPerformerSpider(BasePerformerScraper): + selector_map = { + 'name': '//h1/text()', + 'image': '//meta[@property="og:image"]/@content', + 'image_blob': '//meta[@property="og:image"]/@content', + 'bio': '//meta[@property="og:description"]/@content', + 'birthday': '//div[contains(@class, "model-spec")]/ul/li/h3[contains(text(), "Birthdate")]/../p/text()', + 'birthplace': '//div[contains(@class, "model-spec")]/ul/li/h3[contains(text(), "Born")]/../p/text()', + 'eyecolor': '//div[contains(@class, "model-spec")]/ul/li/h3[contains(text(), "Eyes")]/../p/text()', + 'haircolor': '//div[contains(@class, "model-spec")]/ul/li/h3[contains(text(), "Hair")]/../p/text()', + 'height': '//div[contains(@class, "model-spec")]/ul/li/h3[contains(text(), "Height")]/../p/text()', + 'measurements': '//div[contains(@class, "model-spec")]/ul/li/h3[contains(text(), "Measurements")]/../p/text()', + 'weight': '//div[contains(@class, "model-spec")]/ul/li/h3[contains(text(), "Weight")]/../p/text()', + + 'pagination': '/models?page=%s&order_by=publish_dates&sort_by=desc', + 'external_id': r'models/(.*)/' + } + + name = 'S3xusPerformer' + + start_urls = [ + 'https://www.s3xus.com', + ] + + def get_gender(self, response): + return 'Female' + + def get_performers(self, response): + performers = response.xpath('//div[@class="model-card"]/a/@href').getall() + for performer in performers: + yield scrapy.Request(url=self.format_link(response, performer), callback=self.parse_performer, cookies=self.cookies, headers=self.headers) + + def get_height(self, response): + if 'height' in self.selector_map: + height = self.process_xpath(response, self.get_selector_map('height')).get() + if height: + str_height = re.findall(r'(\d{1,2})', height) + if len(str_height): + feet = int(str_height[0]) + if len(str_height) > 1: + inches = int(str_height[1]) + else: + inches = 0 + heightcm = str(round(((feet * 12) + inches) * 2.54)) + "cm" + return heightcm.strip() + return '' + + def get_weight(self, response): + if 'weight' in self.selector_map: + weight = self.process_xpath(response, self.get_selector_map('weight')).get() + if weight and re.match(r'\d+', weight): + weight = re.search(r'(\d+)', weight).group(1) + weight = int(weight) + if weight: + weight = str(round(weight * .453592)) + "kg" + return weight + + return weight.strip() + return '' + + def get_measurements(self, response): + if 'measurements' in self.selector_map: + measurements = self.process_xpath(response, self.get_selector_map('measurements')).get() + if measurements and re.search(r'(\d+\w+-\d+-\d+)', measurements): + measurements = re.search(r'(\d+\w+-\d+-\d+)', measurements).group(1) + return measurements.strip() + return '' diff --git a/scenes/siteS3xus.py b/scenes/siteS3xus.py new file mode 100644 index 00000000..961d6bc0 --- /dev/null +++ b/scenes/siteS3xus.py @@ -0,0 +1,33 @@ +import re +import scrapy + +from tpdb.BaseSceneScraper import BaseSceneScraper + + +class SiteS3xusSpider(BaseSceneScraper): + name = 'S3xus' + + start_urls = [ + 'https://s3xus.com/', + ] + + selector_map = { + 'title': '//h1/text()', + 'description': '//meta[@property="og:description"]/@content', + 'date': '//ul[@class="info-wrapper"]/li[3]/span/text()', + 'date_formats': ["%b %d, %Y"], + 'image': '//meta[@property="og:image"]/@content', + 'image_blob': '//meta[@property="og:image"]/@content', + 'performers': '//div[@class="model-thumb"]/a/img/@alt', + 'tags': '//div[@class="tag-name"]/a/text()', + 'duration': '//ul[@class="info-wrapper"]/li[1]/span/text()', + 'external_id': r'scenes/(.+)', + 'trailer': '', + 'pagination': '/scenes?page=%s&order_by=publish_date&sort_by=desc' + } + + def get_scenes(self, response): + scenes = response.xpath('//div[@class="card"]/a/@href').getall() + for scene in scenes: + if re.search(self.get_selector_map('external_id'), scene): + yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene)