From e067497e07adc11343eb93354ff079da9c6ff9ef Mon Sep 17 00:00:00 2001 From: Mook Date: Mon, 29 Apr 2024 19:56:17 -0700 Subject: [PATCH 1/2] package.json: Add tldts This will be used to handle eTLD parsing. --- package-lock.json | 32 +++++++++++++++++++++++++++++++- package.json | 3 ++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/package-lock.json b/package-lock.json index 4f86780..bbfe28b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,8 @@ "version": "3.9.0", "license": "MIT", "dependencies": { - "punycode": "^2.3.0" + "punycode": "^2.3.0", + "tldts": "^6.1.18" }, "devDependencies": { "@babel/core": "^7.21.3", @@ -14896,6 +14897,22 @@ "integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==", "dev": true }, + "node_modules/tldts": { + "version": "6.1.18", + "resolved": "https://registry.npmjs.org/tldts/-/tldts-6.1.18.tgz", + "integrity": "sha512-F+6zjPFnFxZ0h6uGb8neQWwHQm8u3orZVFribsGq4eBgEVrzSkHxzWS2l6aKr19T1vXiOMFjqfff4fQt+WgJFg==", + "dependencies": { + "tldts-core": "^6.1.18" + }, + "bin": { + "tldts": "bin/cli.js" + } + }, + "node_modules/tldts-core": { + "version": "6.1.18", + "resolved": "https://registry.npmjs.org/tldts-core/-/tldts-core-6.1.18.tgz", + "integrity": "sha512-e4wx32F/7dMBSZyKAx825Yte3U0PQtZZ0bkWxYQiwLteRVnQ5zM40fEbi0IyNtwQssgJAk3GCr7Q+w39hX0VKA==" + }, "node_modules/tmp": { "version": "0.2.1", "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.1.tgz", @@ -27147,6 +27164,19 @@ "integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==", "dev": true }, + "tldts": { + "version": "6.1.18", + "resolved": "https://registry.npmjs.org/tldts/-/tldts-6.1.18.tgz", + "integrity": "sha512-F+6zjPFnFxZ0h6uGb8neQWwHQm8u3orZVFribsGq4eBgEVrzSkHxzWS2l6aKr19T1vXiOMFjqfff4fQt+WgJFg==", + "requires": { + "tldts-core": "^6.1.18" + } + }, + "tldts-core": { + "version": "6.1.18", + "resolved": "https://registry.npmjs.org/tldts-core/-/tldts-core-6.1.18.tgz", + "integrity": "sha512-e4wx32F/7dMBSZyKAx825Yte3U0PQtZZ0bkWxYQiwLteRVnQ5zM40fEbi0IyNtwQssgJAk3GCr7Q+w39hX0VKA==" + }, "tmp": { "version": "0.2.1", "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.1.tgz", diff --git a/package.json b/package.json index 655c8d1..fa32ec8 100644 --- a/package.json +++ b/package.json @@ -43,6 +43,7 @@ "webpack-merge": "^5.8.0" }, "dependencies": { - "punycode": "^2.3.0" + "punycode": "^2.3.0", + "tldts": "^6.1.18" } } From d3a85526982d040777bcd893321215c9639f6838 Mon Sep 17 00:00:00 2001 From: Mook Date: Mon, 29 Apr 2024 20:01:10 -0700 Subject: [PATCH 2/2] Better handling of eTLDs This fixes eTLD handling for parsing domains (for use with default container rule addition templating) to support IANA TLDs, and while we're at it, effective TLDs from Mozilla's public suffix list. This may result in some hosts ending up with different containers, so this may consitute data loss. --- src/ExtendedURL/__tests__/ExtendURL.spec.js | 28 +++++++++++++++++++++ src/ExtendedURL/index.js | 16 ++++++++---- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/src/ExtendedURL/__tests__/ExtendURL.spec.js b/src/ExtendedURL/__tests__/ExtendURL.spec.js index 044bfc9..c0c53b0 100644 --- a/src/ExtendedURL/__tests__/ExtendURL.spec.js +++ b/src/ExtendedURL/__tests__/ExtendURL.spec.js @@ -8,5 +8,33 @@ describe('utils', () => { expect(eUrl.tld).toEqual('com'); }); + it('should ignore IP addresses', function() { + const eUrl = new ExtendedURL('https://192.0.2.123'); + expect(eUrl.domain).toEqual('192.0.2.123'); + expect(eUrl.tld).toEqual('192.0.2.123'); + }); + + it('should understand second-level TLDs', function() { + const eUrl = new ExtendedURL('https://amazon.co.uk'); + expect(eUrl.domain).toEqual('amazon'); + expect(eUrl.tld).toEqual('co.uk'); + }); + + it('should understand public suffix TLDs', function() { + const eUrl = new ExtendedURL('https://kintesh.github.io'); + expect(eUrl.domain).toEqual('kintesh'); + expect(eUrl.tld).toEqual('github.io'); + }); + + it('should ignore localhost', function() { + const eUrl = new ExtendedURL('https://localhost'); + expect(eUrl.domain).toEqual('localhost'); + expect(eUrl.tld).toEqual('localhost'); + }); + it('accepts unknown domains', function() { + const eUrl = new ExtendedURL('https://some.fake_host.test_test'); + expect(eUrl.domain).toEqual('fake_host'); + expect(eUrl.tld).toEqual('test_test'); + }); }); diff --git a/src/ExtendedURL/index.js b/src/ExtendedURL/index.js index fab9f84..a05e51c 100644 --- a/src/ExtendedURL/index.js +++ b/src/ExtendedURL/index.js @@ -1,12 +1,18 @@ +import { parse } from 'tldts'; + export default class ExtendedURL extends URL { constructor(url) { super(url); - const split = this.hostname.split('.'); - this.tld = split[split.length - 1]; - if (split.length > 1) { - this.domain = split[split.length - 2]; + const parseResult = parse(this.hostname, { + allowPrivateDomains: true, + extractHostname: false, + }); + + if (parseResult.isIp) { + this.domain = this.tld = parseResult.hostname; } else { - this.domain = this.tld; + this.tld = parseResult.publicSuffix; + this.domain = parseResult.domainWithoutSuffix || this.hostname; } } }