Skip to content

Commit

Permalink
Merge branch 'master' into google-inspectiontool
Browse files Browse the repository at this point in the history
  • Loading branch information
lbarthon authored Jan 22, 2025
2 parents 88ce30e + d8a6f55 commit a3cc442
Show file tree
Hide file tree
Showing 4 changed files with 350 additions and 29 deletions.
142 changes: 116 additions & 26 deletions regexes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -148,11 +148,11 @@ user_agent_parsers:
family_replacement: 'Pinterestbot'

# Bots
- regex: '(CSimpleSpider|Cityreview Robot|CrawlDaddy|CrawlFire|Finderbots|Index crawler|Job Roboter|KiwiStatus Spider|Lijit Crawler|QuerySeekerSpider|ScollSpider|Trends Crawler|USyd-NLP-Spider|SiteCat Webbot|BotName\/\$BotVersion|123metaspider-Bot|1470\.net crawler|50\.nu|8bo Crawler Bot|Aboundex|Accoona-[A-z]{1,30}-Agent|AdsBot-Google(?:-[a-z]{1,30}|)|altavista|AppEngine-Google|archive.{0,30}\.org_bot|archiver|Ask Jeeves|[Bb]ai[Dd]u[Ss]pider(?:-[A-Za-z]{1,30})(?:-[A-Za-z]{1,30}|)|bingbot|BingPreview|blitzbot|BlogBridge|Bloglovin|BoardReader Blog Indexer|BoardReader Favicon Fetcher|boitho.com-dc|BotSeer|BUbiNG|\b\w{0,30}favicon\w{0,30}\b|\bYeti(?:-[a-z]{1,30}|)|Catchpoint(?: bot|)|[Cc]harlotte|Checklinks|clumboot|Comodo HTTP\(S\) Crawler|Comodo-Webinspector-Crawler|ConveraCrawler|CRAWL-E|CrawlConvera|Daumoa(?:-feedfetcher|)|Feed Seeker Bot|Feedbin|findlinks|Flamingo_SearchEngine|FollowSite Bot|furlbot|Genieo|gigabot|GomezAgent|gonzo1|(?:[a-zA-Z]{1,30}-|)Googlebot(?:-[a-zA-Z]{1,30}|)|Google SketchUp|grub-client|gsa-crawler|heritrix|HiddenMarket|holmes|HooWWWer|htdig|ia_archiver|ICC-Crawler|Icarus6j|ichiro(?:/mobile|)|IconSurf|IlTrovatore(?:-Setaccio|)|InfuzApp|Innovazion Crawler|InternetArchive|IP2[a-z]{1,30}Bot|jbot\b|KaloogaBot|Kraken|Kurzor|larbin|LEIA|LesnikBot|Linguee Bot|LinkAider|LinkedInBot|Lite Bot|Llaut|lycos|Mail\.RU_Bot|masscan|masidani_bot|Mediapartners-Google|Microsoft .{0,30} Bot|mogimogi|mozDex|MJ12bot|msnbot(?:-media {0,2}|)|msrbot|Mtps Feed Aggregation System|netresearch|Netvibes|NewsGator[^/]{0,30}|^NING|Nutch[^/]{0,30}|Nymesis|ObjectsSearch|OgScrper|Orbiter|OOZBOT|PagePeeker|PagesInventory|PaxleFramework|Peeplo Screenshot Bot|PHPCrawl|PlantyNet_WebRobot|Pompos|Qwantify|Read%20Later|Reaper|RedCarpet|Retreiver|Riddler|Rival IQ|scooter|Scrapy|Scrubby|searchsight|seekbot|semanticdiscovery|SemrushBot|Simpy|SimplePie|SEOstats|SimpleRSS|SiteCon|Slackbot-LinkExpanding|Slack-ImgProxy|Slurp|snappy|Speedy Spider|Squrl Java|Stringer|TheUsefulbot|ThumbShotsBot|Thumbshots\.ru|Tiny Tiny RSS|Twitterbot|WhatsApp|URL2PNG|Vagabondo|VoilaBot|^vortex|Votay bot|^voyager|WASALive.Bot|Web-sniffer|WebThumb|WeSEE:[A-z]{1,30}|WhatWeb|WIRE|WordPress|Wotbox|www\.almaden\.ibm\.com|Xenu(?:.s|) Link Sleuth|Xerka [A-z]{1,30}Bot|yacy(?:bot|)|YahooSeeker|Yahoo! Slurp|Yandex\w{1,30}|YodaoBot(?:-[A-z]{1,30}|)|YottaaMonitor|Yowedo|^Zao|^Zao-Crawler|ZeBot_www\.ze\.bz|ZooShot|ZyBorg|ArcGIS Hub Indexer|Google-InspectionTool)(?:[ /]v?(\d+)(?:\.(\d+)(?:\.(\d+)|)|)|)'
- regex: '(CSimpleSpider|Cityreview Robot|CrawlDaddy|CrawlFire|Finderbots|Index crawler|Job Roboter|KiwiStatus Spider|Lijit Crawler|QuerySeekerSpider|ScollSpider|Trends Crawler|USyd-NLP-Spider|SiteCat Webbot|BotName\/\$BotVersion|123metaspider-Bot|1470\.net crawler|50\.nu|8bo Crawler Bot|Aboundex|Accoona-[A-z]{1,30}-Agent|AdsBot-Google(?:-[a-z]{1,30}|)|altavista|AppEngine-Google|archive.{0,30}\.org_bot|archiver|Ask Jeeves|[Bb]ai[Dd]u[Ss]pider(?:-[A-Za-z]{1,30})(?:-[A-Za-z]{1,30}|)|bingbot|BingPreview|blitzbot|BlogBridge|Bloglovin|BoardReader Blog Indexer|BoardReader Favicon Fetcher|boitho.com-dc|BotSeer|BUbiNG|\b\w{0,30}favicon\w{0,30}\b|\bYeti(?:-[a-z]{1,30}|)|Catchpoint(?: bot|)|[Cc]harlotte|Checklinks|clumboot|Comodo HTTP\(S\) Crawler|Comodo-Webinspector-Crawler|ConveraCrawler|CRAWL-E|CrawlConvera|Daumoa(?:-feedfetcher|)|Feed Seeker Bot|Feedbin|findlinks|Flamingo_SearchEngine|FollowSite Bot|furlbot|Genieo|gigabot|GomezAgent|gonzo1|(?:[a-zA-Z]{1,30}-|)Googlebot(?:-[a-zA-Z]{1,30}|)|GoogleOther|Google SketchUp|grub-client|gsa-crawler|heritrix|HiddenMarket|holmes|HooWWWer|htdig|ia_archiver|ICC-Crawler|Icarus6j|ichiro(?:/mobile|)|IconSurf|IlTrovatore(?:-Setaccio|)|InfuzApp|Innovazion Crawler|InternetArchive|IP2[a-z]{1,30}Bot|jbot\b|KaloogaBot|Kraken|Kurzor|larbin|LEIA|LesnikBot|Linguee Bot|LinkAider|LinkedInBot|Lite Bot|Llaut|lycos|Mail\.RU_Bot|masscan|masidani_bot|Mediapartners-Google|Microsoft .{0,30} Bot|mogimogi|mozDex|MJ12bot|msnbot(?:-media {0,2}|)|msrbot|Mtps Feed Aggregation System|netresearch|Netvibes|NewsGator[^/]{0,30}|^NING|Nutch[^/]{0,30}|Nymesis|ObjectsSearch|OgScrper|Orbiter|OOZBOT|PagePeeker|PagesInventory|PaxleFramework|Peeplo Screenshot Bot|PHPCrawl|PlantyNet_WebRobot|Pompos|Qwantify|Read%20Later|Reaper|RedCarpet|Retreiver|Riddler|Rival IQ|scooter|Scrapy|Scrubby|searchsight|seekbot|semanticdiscovery|SemrushBot|Simpy|SimplePie|SEOstats|SimpleRSS|SiteCon|Slackbot-LinkExpanding|Slack-ImgProxy|Slurp|snappy|Speedy Spider|Squrl Java|Stringer|TheUsefulbot|ThumbShotsBot|Thumbshots\.ru|Tiny Tiny RSS|Twitterbot|WhatsApp|URL2PNG|Vagabondo|VoilaBot|^vortex|Votay bot|^voyager|WASALive.Bot|Web-sniffer|WebThumb|WeSEE:[A-z]{1,30}|WhatWeb|WIRE|WordPress|Wotbox|www\.almaden\.ibm\.com|Xenu(?:.s|) Link Sleuth|Xerka [A-z]{1,30}Bot|yacy(?:bot|)|YahooSeeker|Yahoo! Slurp|Yandex\w{1,30}|YodaoBot(?:-[A-z]{1,30}|)|YottaaMonitor|Yowedo|^Zao|^Zao-Crawler|ZeBot_www\.ze\.bz|ZooShot|ZyBorg|ArcGIS Hub Indexer|GPTBot|Google-InspectionTool)(?:[ /]v?(\d+)(?:\.(\d+)(?:\.(\d+)|)|)|)'

# AWS S3 Clients
# must come before "Bots General matcher" to catch "boto"/"boto3" before "bot"
- regex: '\b(Boto3?|JetS3t|aws-(?:cli|sdk-(?:cpp|go|java|nodejs|ruby2?|dotnet-(?:\d{1,2}|core)))|s3fs)/(\d+)\.(\d+)(?:\.(\d+)|)'
- regex: '\b(Boto3?|JetS3t|aws-(?:cli|sdk-(?:cpp|go|go-v\d|java|nodejs|ruby2?|dotnet-(?:\d{1,2}|core)))|s3fs)/(\d+)\.(\d+)(?:\.(\d+)|)'

# SAFE FME
- regex: '(FME)\/(\d+\.\d+)\.(\d+)\.(\d+)'
Expand All @@ -179,6 +179,9 @@ user_agent_parsers:
- regex: '\[FB.{0,300};'
family_replacement: 'Facebook'

# RecipeRadar crawler
- regex: '(RecipeRadar)/(\d+)\.(\d+)(?:\.(\d+)|)'

# Bots General matcher 'name/0.0'
- regex: '^.{0,200}?(?:\/[A-Za-z0-9\.]{0,50}|) {0,2}([A-Za-z0-9 \-_\!\[\]:]{0,50}(?:[Aa]rchiver|[Ii]ndexer|[Ss]craper|[Bb]ot|[Ss]pider|[Cc]rawl[a-z]{0,50}))[/ ](\d+)(?:\.(\d+)(?:\.(\d+)|)|)'
# Bots containing bot(but not CUBOT)
Expand Down Expand Up @@ -215,6 +218,13 @@ user_agent_parsers:
# Twitter
- regex: '(Twitter for (?:iPhone|iPad)|TwitterAndroid)(?:\/(\d+)\.(\d+)|)'
family_replacement: 'Twitter'
# TikTok
- regex: '(musical_ly) app_version\/(\d+)\.(\d+)\.(\d+)'
family_replacement: 'TikTok'
- regex: '(musical_ly_)(\d+)\.(\d+)\.(\d+)'
family_replacement: 'TikTok'
- regex: '(BytedanceWebview)\/[a-z0-9]+'
family_replacement: 'TikTok'

# Phantom app
- regex: 'Mozilla.{1,200}Mobile.{1,100}(Phantom\/ios|Phantom\/android).(\d+)\.(\d+)\.(\d+)'
Expand Down Expand Up @@ -487,11 +497,17 @@ user_agent_parsers:
family_replacement: 'Tenta Browser'

# Ecosia on iOS / Android
- regex: '(Ecosia) ios@(\d+)\.(\d+)\.(\d+)\.(\d+)'
- regex: '(Ecosia) ios@(\d+)(?:\.(\d+)|)(?:\.(\d+)|)(?:\.(\d+)|)'
family_replacement: 'Ecosia iOS'
- regex: '(Ecosia) android@(\d+)\.(\d+)\.(\d+)\.(\d+)'
- regex: '(Ecosia) android@(\d+)(?:\.(\d+)|)(?:\.(\d+)|)(?:\.(\d+)|)'
family_replacement: 'Ecosia Android'

# VivoBrowser
- regex: '(VivoBrowser)\/(\d+)\.(\d+)\.(\d+)\.(\d+)'

# HiBrowser
- regex: '(HiBrowser)\/v(\d+)\.(\d+)\.(\d+)\.(\d+)'

# Chrome Mobile
- regex: 'Version/.{1,300}(Chrome)/(\d+)\.(\d+)\.(\d+)\.(\d+)'
family_replacement: 'Chrome Mobile WebView'
Expand Down Expand Up @@ -1145,6 +1161,29 @@ os_parsers:
# Special case for new ArcGIS Mobile products
- regex: 'ArcGISRuntime-(?:Android|iOS)\/\d+\.\d+(?:\.\d+|) \((Android|iOS) (\d+)(?:\.(\d+)(?:\.(\d+)|)|);'

##########
# Chromecast
##########
# Ex: Mozilla/5.0 (Linux; Android 12.0; Build/STTL.240206.002) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.0 Safari/537.36 CrKey/1.56.500000 DeviceType/AndroidTV
# These are the newer Android-based "Google TV" Chromecast devices.
# Google stopped updating the Chromecast firmware version in these, so they always say CrKey/1.56.500000. Therefore we extract the more useful Android version instead.
- regex: '(Android) (\d+)(?:\.(\d+)).*CrKey'
os_replacement: 'Chromecast Android'

# Ex: Mozilla/5.0 (Fuchsia) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 CrKey/1.56.500000
# These are some intermediate "Nest Hub" Chromecast devices running Fuchsia.
- regex: 'Fuchsia.*(CrKey)(?:[/](\d+)\.(\d+)(?:\.(\d+)|)|)'
os_replacement: 'Chromecast Fuchsia'

# Ex: Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.225 Safari/537.36 CrKey/1.56.500000 DeviceType/SmartSpeaker
- regex: 'Linux.*(CrKey)(?:[/](\d+)\.(\d+)(?:\.(\d+)|)|).*DeviceType/SmartSpeaker'
os_replacement: 'Chromecast SmartSpeaker'

# Ex: Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.225 Safari/537.36 CrKey/1.56.500000 DeviceType/Chromecast
# These are the oldest Chromecast devices that ran Linux.
- regex: 'Linux.*(CrKey)(?:[/](\d+)\.(\d+)(?:\.(\d+)|)|)'
os_replacement: 'Chromecast Linux'

##########
# Android
# can actually detect rooted android os. do we care?
Expand Down Expand Up @@ -1308,6 +1347,10 @@ os_parsers:
##########
- regex: '(Tizen)[/ ](\d+)\.(\d+)'

# Chrome and Edge on iOS with desktop mode contains Mac OS X, so it must be before any Mac OS check
- regex: 'Intel Mac OS X.+(CriOS|EdgiOS)/\d+'
os_replacement: 'iOS'

##########
# Mac OS
# @ref: http://en.wikipedia.org/wiki/Mac_OS_X#Versions
Expand Down Expand Up @@ -1360,6 +1403,13 @@ os_parsers:
# Box Drive and Box Sync on Mac OS X use OSX version numbers, not Darwin
- regex: '^Box.{0,200};(Darwin)/(10)\.(1\d)(?:\.(\d+)|)'
os_replacement: 'Mac OS X'

##########
# Hashicorp API
# APN/1.0 HashiCorp/1.0 Terraform/1.8.0 (+https://www.terraform.io) terraform-provider-aws/4.67.0 (+https://registry.terraform.io/providers/hashicorp/aws) aws-sdk-go/1.44.261 (go1.19.8; darwin; arm64)
##########
- regex: 'darwin; arm64'
os_replacement: 'Mac OS X'

##########
# iOS
Expand Down Expand Up @@ -1645,29 +1695,27 @@ os_parsers:
- regex: 'CFNetwork/.{0,100} Darwin/(21)\.\d+'
os_replacement: 'iOS'
os_v1_replacement: '15'
- regex: 'CFNetwork/.{0,100} Darwin/22\.0\.\d+'
- regex: 'CFNetwork/.{0,100} Darwin/22\.([0-5])\.\d+'
os_replacement: 'iOS'
os_v1_replacement: '16'
os_v2_replacement: '0'
- regex: 'CFNetwork/.{0,100} Darwin/22\.1\.\d+'
os_v2_replacement: '$1'
- regex: 'CFNetwork/.{0,100} Darwin/(22)\.\d+'
os_replacement: 'iOS'
os_v1_replacement: '16'
os_v2_replacement: '1'
- regex: 'CFNetwork/.{0,100} Darwin/22\.2\.\d+'
- regex: 'CFNetwork/.{0,100} Darwin/23\.([0-5])\.\d+'
os_replacement: 'iOS'
os_v1_replacement: '16'
os_v2_replacement: '2'
- regex: 'CFNetwork/.{0,100} Darwin/22\.3\.\d+'
os_v1_replacement: '17'
os_v2_replacement: '$1'
- regex: 'CFNetwork/.{0,100} Darwin/(23)\.\d+'
os_replacement: 'iOS'
os_v1_replacement: '16'
os_v2_replacement: '3'
- regex: 'CFNetwork/.{0,100} Darwin/22\.4\.\d+'
os_v1_replacement: '17'
- regex: 'CFNetwork/.{0,100} Darwin/24\.([0-5])\.\d+'
os_replacement: 'iOS'
os_v1_replacement: '16'
os_v2_replacement: '4'
- regex: 'CFNetwork/.{0,100} Darwin/(22)\.\d+'
os_v1_replacement: '18'
os_v2_replacement: '$1'
- regex: 'CFNetwork/.{0,100} Darwin/(24)\.\d+'
os_replacement: 'iOS'
os_v1_replacement: '16'
os_v1_replacement: '18'
- regex: 'CFNetwork/.{0,100} Darwin/'
os_replacement: 'iOS'

Expand Down Expand Up @@ -1822,12 +1870,6 @@ os_parsers:

- regex: '(WebTV)/(\d+).(\d+)'

##########
# Chromecast
##########
- regex: '(CrKey)(?:[/](\d+)\.(\d+)(?:\.(\d+)|)|)'
os_replacement: 'Chromecast'

##########
# Misc mobile
##########
Expand Down Expand Up @@ -1868,6 +1910,21 @@ os_parsers:
# Roku Digital-Video-Players https://www.roku.com/
- regex: '^(Roku)/DVP-(\d+)\.(\d+)'

##########
# Amazon S3 client boto3
# Hasicorp API
# Boto3/1.28.62 md/Botocore#1.31.62 ua/2.0 os/macos#22.4.0 md/arch#arm64 lang/python#3.11.6 md/pyimpl#CPython cfg/retry-mode#legacy Botocore/1.31.62
# APN/1.0 HashiCorp/1.0 Terraform/1.8.1 (+https://www.terraform.io) terraform-provider-aws/4.67.0 (+https://registry.terraform.io/providers/hashicorp/aws) aws-sdk-go-v2/1.18.0 os/macos lang/go/1.19.8 md/GOOS/darwin md/GOARCH/arm64 api/identitystore/1.16.11
##########
- regex: 'os\/macos[#]?(\d*)[.]?(\d*)[.]?(\d*)'
os_replacement: 'Mac OS X'
os_v1_replacement: '$1'
os_v2_replacement: '$2'
os_v3_replacement: '$3'

# Huawei HarmonyOS
- regex: '(HarmonyOS)[\s;]+(\d+|)\.?(\d+|)\.?(\d+|)'

device_parsers:

#########
Expand Down Expand Up @@ -2461,6 +2518,39 @@ device_parsers:
brand_replacement: 'ChangJia'
model_replacement: '$1'

##########
# Chromecast
# @ref: https://en.wikipedia.org/wiki/Chromecast#Hardware_and_design
##########
# Ex: Mozilla/5.0 (Linux; Android 12.0; Build/STTL.240206.002) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.0 Safari/537.36 CrKey/1.56.500000 DeviceType/AndroidTV
# Ex: Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.225 Safari/537.36 CrKey/1.56.500000 DeviceType/SmartSpeaker
# Ex: Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.225 Safari/537.36 CrKey/1.56.500000 DeviceType/Chromecast
# These are the newer Chromecast devices, such as smart speakers, Google TVs, etc. that have an explicit device type.
- regex: 'CrKey.*DeviceType/([^/]*)'
brand_replacement: 'Google'
device_replacement: 'Chromecast'
model_replacement: '$1'

# Ex: Mozilla/5.0 (Fuchsia) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 CrKey/1.56.500000
# These are some intermediate "Nest Hub" Chromecast devices running Fuchsia.
- regex: 'Fuchsia.*CrKey'
brand_replacement: 'Google'
device_replacement: 'Chromecast'
model_replacement: 'Nest Hub'

# Ex: Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.47 Safari/537.36 CrKey/1.36.159268
# These are the first generation of Chromecast devices that ran Linux. They don't specify a device type.
- regex: 'Linux.*CrKey/1.36'
brand_replacement: 'Google'
device_replacement: 'Chromecast'
model_replacement: 'First Generation'

# We have no data on the user agent strings of other models, except that they all report CrKey/
- regex: 'CrKey/'
brand_replacement: 'Google'
device_replacement: 'Chromecast'
model_replacement: 'Chromecast'

#########
# Cloudfone
# @ref: http://www.cloudfonemobile.com/
Expand Down Expand Up @@ -5851,7 +5941,7 @@ device_parsers:
##########
# Spiders (this is a hack...)
##########
- regex: '^.{0,100}(bot|BUbiNG|zao|borg|DBot|oegp|silk|Xenu|zeal|^NING|CCBot|crawl|htdig|lycos|slurp|teoma|voila|yahoo|Sogou|CiBra|Nutch|^Java/|^JNLP/|Daumoa|Daum|Genieo|ichiro|larbin|pompos|Scrapy|snappy|speedy|spider|msnbot|msrbot|vortex|^vortex|crawler|favicon|indexer|Riddler|scooter|scraper|scrubby|WhatWeb|WinHTTP|bingbot|BingPreview|openbot|gigabot|furlbot|polybot|seekbot|^voyager|archiver|Icarus6j|mogimogi|Netvibes|blitzbot|altavista|charlotte|findlinks|Retreiver|TLSProber|WordPress|SeznamBot|ProoXiBot|wsr\-agent|Squrl Java|EtaoSpider|PaperLiBot|SputnikBot|A6\-Indexer|netresearch|searchsight|baiduspider|YisouSpider|ICC\-Crawler|http%20client|Python-urllib|dataparksearch|converacrawler|Screaming Frog|AppEngine-Google|YahooCacheSystem|fast\-webcrawler|Sogou Pic Spider|semanticdiscovery|Innovazion Crawler|facebookexternalhit|Google.{0,200}/\+/web/snippet|Google-HTTP-Java-Client|BlogBridge|IlTrovatore-Setaccio|InternetArchive|GomezAgent|WebThumbnail|heritrix|NewsGator|PagePeeker|Reaper|ZooShot|holmes|NL-Crawler|Pingdom|StatusCake|WhatsApp|masscan|Google Web Preview|Qwantify|Yeti|OgScrper|Google-InspectionTool)'
- regex: '^.{0,100}(bot|BUbiNG|zao|borg|DBot|oegp|silk|Xenu|zeal|^NING|CCBot|crawl|htdig|lycos|slurp|teoma|voila|yahoo|Sogou|CiBra|Nutch|^Java/|^JNLP/|Daumoa|Daum|Genieo|ichiro|larbin|pompos|Scrapy|snappy|speedy|spider|msnbot|msrbot|vortex|^vortex|crawler|favicon|indexer|Riddler|scooter|scraper|scrubby|WhatWeb|WinHTTP|bingbot|BingPreview|openbot|gigabot|furlbot|polybot|seekbot|^voyager|archiver|Icarus6j|mogimogi|Netvibes|blitzbot|altavista|charlotte|findlinks|Retreiver|TLSProber|WordPress|SeznamBot|ProoXiBot|wsr\-agent|Squrl Java|EtaoSpider|PaperLiBot|SputnikBot|A6\-Indexer|netresearch|searchsight|baiduspider|YisouSpider|ICC\-Crawler|http%20client|Python-urllib|dataparksearch|converacrawler|Screaming Frog|AppEngine-Google|YahooCacheSystem|fast\-webcrawler|Sogou Pic Spider|semanticdiscovery|Innovazion Crawler|facebookexternalhit|Google.{0,200}/\+/web/snippet|Google-HTTP-Java-Client|BlogBridge|IlTrovatore-Setaccio|InternetArchive|GomezAgent|WebThumbnail|heritrix|NewsGator|PagePeeker|Reaper|ZooShot|holmes|NL-Crawler|Pingdom|StatusCake|WhatsApp|masscan|Google Web Preview|Qwantify|Yeti|OgScrper|RecipeRadar|GPTBot|Google-InspectionTool)'
regex_flag: 'i'
device_replacement: 'Spider'
brand_replacement: 'Spider'
Expand Down
36 changes: 34 additions & 2 deletions tests/test_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7735,6 +7735,36 @@ test_cases:
brand: 'ChangJia'
model: 'TPC97113'

- user_agent_string: 'Mozilla/5.0 (Linux; Android 12.0; Build/STTL.240206.002) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.0 Safari/537.36 CrKey/1.56.500000 DeviceType/AndroidTV'
family: 'Chromecast'
brand: 'Google'
model: 'AndroidTV'

- user_agent_string: 'Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.225 Safari/537.36 CrKey/1.56.500000 DeviceType/SmartSpeaker'
family: 'Chromecast'
brand: 'Google'
model: 'SmartSpeaker'

- user_agent_string: 'Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.225 Safari/537.36 CrKey/1.56.500000 DeviceType/Chromecast'
family: 'Chromecast'
brand: 'Google'
model: 'Chromecast'

- user_agent_string: 'Mozilla/5.0 (Fuchsia) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 CrKey/1.56.500000'
family: 'Chromecast'
brand: 'Google'
model: 'Nest Hub'

- user_agent_string: 'Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.47 Safari/537.36 CrKey/1.36.159268'
family: 'Chromecast'
brand: 'Google'
model: 'First Generation'

- user_agent_string: 'Mozilla/5.0 (X11; Linux armv7l) CrKey/1'
family: 'Chromecast'
brand: 'Google'
model: 'Chromecast'

- user_agent_string: 'Mozilla/5.0 (Linux; U; Android 2.3.5; En-gb; Cloudfone_Excite320e Build/GRJ90) AppleWebKit/533.1 (KHTML, Like Gecko) Version/4.0 Mobile Safari/533.1'
family: 'Cloudfone Excite 320e'
brand: 'Cloudfone'
Expand Down Expand Up @@ -80599,5 +80629,7 @@ test_cases:
brand: 'Motorola'
model: 'motorola moto g play (2021)'



- user_agent_string: 'Mozilla/5.0 (compatible; Linux x86_64; python-requests/2.32.3; RecipeRadar/0.1; +https://www.reciperadar.com)'
family: 'Spider'
brand: 'Spider'
model: 'Desktop'
Loading

0 comments on commit a3cc442

Please sign in to comment.