ThePornDatabase · Darklyter · Oct 30, 2023 · Oct 30, 2023 · Oct 30, 2023 · Oct 30, 2023
diff --git a/performers/siteS3xusPerformer.py b/performers/siteS3xusPerformer.py
@@ -0,0 +1,73 @@
+import re
+import scrapy
+
+from tpdb.BasePerformerScraper import BasePerformerScraper
+
+
+class SiteS3xusPerformerSpider(BasePerformerScraper):
+    selector_map = {
+        'name': '//h1/text()',
+        'image': '//meta[@property="og:image"]/@content',
+        'image_blob': '//meta[@property="og:image"]/@content',
+        'bio': '//meta[@property="og:description"]/@content',
+        'birthday': '//div[contains(@class, "model-spec")]/ul/li/h3[contains(text(), "Birthdate")]/../p/text()',
+        'birthplace': '//div[contains(@class, "model-spec")]/ul/li/h3[contains(text(), "Born")]/../p/text()',
+        'eyecolor': '//div[contains(@class, "model-spec")]/ul/li/h3[contains(text(), "Eyes")]/../p/text()',
+        'haircolor': '//div[contains(@class, "model-spec")]/ul/li/h3[contains(text(), "Hair")]/../p/text()',
+        'height': '//div[contains(@class, "model-spec")]/ul/li/h3[contains(text(), "Height")]/../p/text()',
+        'measurements': '//div[contains(@class, "model-spec")]/ul/li/h3[contains(text(), "Measurements")]/../p/text()',
+        'weight': '//div[contains(@class, "model-spec")]/ul/li/h3[contains(text(), "Weight")]/../p/text()',
+
+        'pagination': '/models?page=%s&order_by=publish_dates&sort_by=desc',
+        'external_id': r'models/(.*)/'
+    }
+
+    name = 'S3xusPerformer'
+
+    start_urls = [
+        'https://www.s3xus.com',
+    ]
+
+    def get_gender(self, response):
+        return 'Female'
+
+    def get_performers(self, response):
+        performers = response.xpath('//div[@class="model-card"]/a/@href').getall()
+        for performer in performers:
+            yield scrapy.Request(url=self.format_link(response, performer), callback=self.parse_performer, cookies=self.cookies, headers=self.headers)
+
+    def get_height(self, response):
+        if 'height' in self.selector_map:
+            height = self.process_xpath(response, self.get_selector_map('height')).get()
+            if height:
+                str_height = re.findall(r'(\d{1,2})', height)
+                if len(str_height):
+                    feet = int(str_height[0])
+                    if len(str_height) > 1:
+                        inches = int(str_height[1])
+                    else:
+                        inches = 0
+                    heightcm = str(round(((feet * 12) + inches) * 2.54)) + "cm"
+                    return heightcm.strip()
+        return ''
+
+    def get_weight(self, response):
+        if 'weight' in self.selector_map:
+            weight = self.process_xpath(response, self.get_selector_map('weight')).get()
+            if weight and re.match(r'\d+', weight):
+                weight = re.search(r'(\d+)', weight).group(1)
+                weight = int(weight)
+                if weight:
+                    weight = str(round(weight * .453592)) + "kg"
+                    return weight
+
+                return weight.strip()
+        return ''
+
+    def get_measurements(self, response):
+        if 'measurements' in self.selector_map:
+            measurements = self.process_xpath(response, self.get_selector_map('measurements')).get()
+            if measurements and re.search(r'(\d+\w+-\d+-\d+)', measurements):
+                measurements = re.search(r'(\d+\w+-\d+-\d+)', measurements).group(1)
+                return measurements.strip()
+        return ''
diff --git a/scenes/siteS3xus.py b/scenes/siteS3xus.py
@@ -0,0 +1,33 @@
+import re
+import scrapy
+
+from tpdb.BaseSceneScraper import BaseSceneScraper
+
+
+class SiteS3xusSpider(BaseSceneScraper):
+    name = 'S3xus'
+
+    start_urls = [
+        'https://s3xus.com/',
+    ]
+
+    selector_map = {
+        'title': '//h1/text()',
+        'description': '//meta[@property="og:description"]/@content',
+        'date': '//ul[@class="info-wrapper"]/li[3]/span/text()',
+        'date_formats': ["%b %d, %Y"],
+        'image': '//meta[@property="og:image"]/@content',
+        'image_blob': '//meta[@property="og:image"]/@content',
+        'performers': '//div[@class="model-thumb"]/a/img/@alt',
+        'tags': '//div[@class="tag-name"]/a/text()',
+        'duration': '//ul[@class="info-wrapper"]/li[1]/span/text()',
+        'external_id': r'scenes/(.+)',
+        'trailer': '',
+        'pagination': '/scenes?page=%s&order_by=publish_date&sort_by=desc'
+    }
+
+    def get_scenes(self, response):
+        scenes = response.xpath('//div[@class="card"]/a/@href').getall()
+        for scene in scenes:
+            if re.search(self.get_selector_map('external_id'), scene):
+                yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene)