diff --git a/README.md b/README.md index 055b1a9..3ed9149 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,7 @@ $api = new \TikScraper\Api([ 'verify_fp' => 'verify_...', // Cookie used for skipping captcha requests 'device_id' => '596845...' // Custom device id 'user_agent' => 'YOUR_CUSTOM_USER_AGENT_HERE', - 'proxy' => 'http://user:password@hostname:port', - 'cookie_path' => '/your/custom/path/here/tiktok.json' // Path to store Guzzle's cookies, defaults to /tmp/tiktok.json + 'proxy' => 'http://user:password@hostname:port' ], $cacheEngine); $tag = $api->hashtag('funny'); @@ -46,6 +45,7 @@ TikScrapperPHP supports caching requests, to use it you need to implement [ICach HUGE thanks to the following projects, this wouldn't be possible without their help +* [puppeteer-extra-plugin-stealth](https://github.com/berstend/puppeteer-extra/blob/master/packages/puppeteer-extra-plugin-stealth), ported library to PHP * [TikTok-API-PHP](https://github.com/ssovit/TikTok-API-PHP) * [TikTok-Api](https://github.com/davidteather/TikTok-Api) * [tiktok-signature](https://github.com/carcabot/tiktok-signature) diff --git a/VERSION b/VERSION index 0a9b01e..cfad412 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.6.0.5 +2.6.1.0 diff --git a/composer.json b/composer.json index 55b28a0..5956171 100644 --- a/composer.json +++ b/composer.json @@ -26,7 +26,6 @@ "require": { "php": "^8.1", "php-webdriver/webdriver": "^1.12", - "sapistudio/seleniumstealth": "^1.0", "guzzlehttp/guzzle": "^7.8" }, "require-dev": { diff --git a/composer.lock b/composer.lock index 0406935..cbdda49 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "6bbea8724bc0fbfea840ea9a02a646c4", + "content-hash": "b8c307dfc18dd21b6e0dd93d7a5ad92e", "packages": [ { "name": "guzzlehttp/guzzle", @@ -601,36 +601,6 @@ }, "time": "2019-03-08T08:55:37+00:00" }, - { - "name": "sapistudio/seleniumstealth", - "version": "1.0.3", - "source": { - "type": "git", - "url": "https://github.com/Sapistudio/SeleniumStealth.git", - "reference": "4b392077e3d609fed564b78ae2e8becb98f01957" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/Sapistudio/SeleniumStealth/zipball/4b392077e3d609fed564b78ae2e8becb98f01957", - "reference": "4b392077e3d609fed564b78ae2e8becb98f01957", - "shasum": "" - }, - "type": "library", - "autoload": { - "psr-4": { - "SapiStudio\\SeleniumStealth\\": "src/" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "support": { - "issues": "https://github.com/Sapistudio/SeleniumStealth/issues", - "source": "https://github.com/Sapistudio/SeleniumStealth/tree/1.0.3" - }, - "time": "2022-01-10T20:04:41+00:00" - }, { "name": "symfony/deprecation-contracts", "version": "v3.5.0", diff --git a/js/stealth/README.md b/js/stealth/README.md new file mode 100644 index 0000000..af1edc4 --- /dev/null +++ b/js/stealth/README.md @@ -0,0 +1,4 @@ +# NOTE +This is a port of [puppeteer-extra-plugin-stealth](https://github.com/berstend/puppeteer-extra/blob/master/packages/puppeteer-extra-plugin-stealth) to PHP + +`navigator.webdriver` and `user-agent-override` are implemented using PHP code! diff --git a/js/stealth/chrome.app.js b/js/stealth/chrome.app.js new file mode 100644 index 0000000..9b26d73 --- /dev/null +++ b/js/stealth/chrome.app.js @@ -0,0 +1,77 @@ +// https://github.com/berstend/puppeteer-extra/blob/master/packages/puppeteer-extra-plugin-stealth/evasions/chrome.app/index.js +(function () { + utils.init() + if (!window.chrome) { + // Use the exact property descriptor found in headful Chrome + // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')` + Object.defineProperty(window, 'chrome', { + writable: true, + enumerable: true, + configurable: false, // note! + value: {} // We'll extend that later + }) + } + + // That means we're running headful and don't need to mock anything + if ('app' in window.chrome) { + return // Nothing to do here + } + + const makeError = { + ErrorInInvocation: fn => { + const err = new TypeError(`Error in invocation of app.${fn}()`) + return utils.stripErrorWithAnchor( + err, + `at ${fn} (eval at ` + ) + } + } + + // There's a some static data in that property which doesn't seem to change, + // we should periodically check for updates: `JSON.stringify(window.app, null, 2)` + const STATIC_DATA = JSON.parse( + ` + { + "isInstalled": false, + "InstallState": { + "DISABLED": "disabled", + "INSTALLED": "installed", + "NOT_INSTALLED": "not_installed" + }, + "RunningState": { + "CANNOT_RUN": "cannot_run", + "READY_TO_RUN": "ready_to_run", + "RUNNING": "running" + } + } + `.trim() + ) + + window.chrome.app = { + ...STATIC_DATA, + + get isInstalled() { + return false + }, + + getDetails: function getDetails() { + if (arguments.length) { + throw makeError.ErrorInInvocation(`getDetails`) + } + return null + }, + getIsInstalled: function getDetails() { + if (arguments.length) { + throw makeError.ErrorInInvocation(`getIsInstalled`) + } + return false + }, + runningState: function getDetails() { + if (arguments.length) { + throw makeError.ErrorInInvocation(`runningState`) + } + return 'cannot_run' + } + } + utils.patchToStringNested(window.chrome.app) +})() diff --git a/js/stealth/chrome.csi.js b/js/stealth/chrome.csi.js new file mode 100644 index 0000000..18a2a94 --- /dev/null +++ b/js/stealth/chrome.csi.js @@ -0,0 +1,37 @@ +// https://github.com/berstend/puppeteer-extra/blob/master/packages/puppeteer-extra-plugin-stealth/evasions/chrome.csi/index.js + +(function () { + utils.init() + if (!window.chrome) { + // Use the exact property descriptor found in headful Chrome + // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')` + Object.defineProperty(window, 'chrome', { + writable: true, + enumerable: true, + configurable: false, // note! + value: {} // We'll extend that later + }) + } + + // That means we're running headful and don't need to mock anything + if ('csi' in window.chrome) { + return // Nothing to do here + } + + // Check that the Navigation Timing API v1 is available, we need that + if (!window.performance || !window.performance.timing) { + return + } + + const { timing } = window.performance + + window.chrome.csi = function () { + return { + onloadT: timing.domContentLoadedEventEnd, + startE: timing.navigationStart, + pageT: Date.now() - timing.navigationStart, + tran: 15 // Transition type or something + } + } + utils.patchToString(window.chrome.csi) +})() diff --git a/js/stealth/chrome.loadtimes.js b/js/stealth/chrome.loadtimes.js new file mode 100644 index 0000000..c89d6b4 --- /dev/null +++ b/js/stealth/chrome.loadtimes.js @@ -0,0 +1,128 @@ +// https://github.com/berstend/puppeteer-extra/blob/master/packages/puppeteer-extra-plugin-stealth/evasions/chrome.loadTimes/index.js + +(function () { + utils.init() + if (!window.chrome) { + // Use the exact property descriptor found in headful Chrome + // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')` + Object.defineProperty(window, 'chrome', { + writable: true, + enumerable: true, + configurable: false, // note! + value: {} // We'll extend that later + }) + } + + // That means we're running headful and don't need to mock anything + if ('loadTimes' in window.chrome) { + return // Nothing to do here + } + + // Check that the Navigation Timing API v1 + v2 is available, we need that + if ( + !window.performance || + !window.performance.timing || + !window.PerformancePaintTiming + ) { + return + } + + const { performance } = window + + // Some stuff is not available on about:blank as it requires a navigation to occur, + // let's harden the code to not fail then: + const ntEntryFallback = { + nextHopProtocol: 'h2', + type: 'other' + } + + // The API exposes some funky info regarding the connection + const protocolInfo = { + get connectionInfo() { + const ntEntry = + performance.getEntriesByType('navigation')[0] || ntEntryFallback + return ntEntry.nextHopProtocol + }, + get npnNegotiatedProtocol() { + // NPN is deprecated in favor of ALPN, but this implementation returns the + // HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN. + const ntEntry = + performance.getEntriesByType('navigation')[0] || ntEntryFallback + return ['h2', 'hq'].includes(ntEntry.nextHopProtocol) + ? ntEntry.nextHopProtocol + : 'unknown' + }, + get navigationType() { + const ntEntry = + performance.getEntriesByType('navigation')[0] || ntEntryFallback + return ntEntry.type + }, + get wasAlternateProtocolAvailable() { + // The Alternate-Protocol header is deprecated in favor of Alt-Svc + // (https://www.mnot.net/blog/2016/03/09/alt-svc), so technically this + // should always return false. + return false + }, + get wasFetchedViaSpdy() { + // SPDY is deprecated in favor of HTTP/2, but this implementation returns + // true for HTTP/2 or HTTP2+QUIC/39 as well. + const ntEntry = + performance.getEntriesByType('navigation')[0] || ntEntryFallback + return ['h2', 'hq'].includes(ntEntry.nextHopProtocol) + }, + get wasNpnNegotiated() { + // NPN is deprecated in favor of ALPN, but this implementation returns true + // for HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN. + const ntEntry = + performance.getEntriesByType('navigation')[0] || ntEntryFallback + return ['h2', 'hq'].includes(ntEntry.nextHopProtocol) + } + } + + const { timing } = window.performance + + // Truncate number to specific number of decimals, most of the `loadTimes` stuff has 3 + function toFixed(num, fixed) { + var re = new RegExp('^-?\\d+(?:.\\d{0,' + (fixed || -1) + '})?') + return num.toString().match(re)[0] + } + + const timingInfo = { + get firstPaintAfterLoadTime() { + // This was never actually implemented and always returns 0. + return 0 + }, + get requestTime() { + return timing.navigationStart / 1000 + }, + get startLoadTime() { + return timing.navigationStart / 1000 + }, + get commitLoadTime() { + return timing.responseStart / 1000 + }, + get finishDocumentLoadTime() { + return timing.domContentLoadedEventEnd / 1000 + }, + get finishLoadTime() { + return timing.loadEventEnd / 1000 + }, + get firstPaintTime() { + const fpEntry = performance.getEntriesByType('paint')[0] || { + startTime: timing.loadEventEnd / 1000 // Fallback if no navigation occured (`about:blank`) + } + return toFixed( + (fpEntry.startTime + performance.timeOrigin) / 1000, + 3 + ) + } + } + + window.chrome.loadTimes = function () { + return { + ...protocolInfo, + ...timingInfo + } + } + utils.patchToString(window.chrome.loadTimes) +})() diff --git a/js/stealth/chrome.runtime.js b/js/stealth/chrome.runtime.js new file mode 100644 index 0000000..2d96524 --- /dev/null +++ b/js/stealth/chrome.runtime.js @@ -0,0 +1,262 @@ +// https://github.com/berstend/puppeteer-extra/blob/master/packages/puppeteer-extra-plugin-stealth/evasions/chrome.runtime/index.js + +(function () { + utils.init(); + const STATIC_DATA = { + "OnInstalledReason": { + "CHROME_UPDATE": "chrome_update", + "INSTALL": "install", + "SHARED_MODULE_UPDATE": "shared_module_update", + "UPDATE": "update" + }, + "OnRestartRequiredReason": { + "APP_UPDATE": "app_update", + "OS_UPDATE": "os_update", + "PERIODIC": "periodic" + }, + "PlatformArch": { + "ARM": "arm", + "ARM64": "arm64", + "MIPS": "mips", + "MIPS64": "mips64", + "X86_32": "x86-32", + "X86_64": "x86-64" + }, + "PlatformNaclArch": { + "ARM": "arm", + "MIPS": "mips", + "MIPS64": "mips64", + "X86_32": "x86-32", + "X86_64": "x86-64" + }, + "PlatformOs": { + "ANDROID": "android", + "CROS": "cros", + "LINUX": "linux", + "MAC": "mac", + "OPENBSD": "openbsd", + "WIN": "win" + }, + "RequestUpdateCheckStatus": { + "NO_UPDATE": "no_update", + "THROTTLED": "throttled", + "UPDATE_AVAILABLE": "update_available" + } + } + + if (!window.chrome) { + // Use the exact property descriptor found in headful Chrome + // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')` + Object.defineProperty(window, 'chrome', { + writable: true, + enumerable: true, + configurable: false, // note! + value: {} // We'll extend that later + }) + } + + // That means we're running headful and don't need to mock anything + const existsAlready = 'runtime' in window.chrome + // `chrome.runtime` is only exposed on secure origins + const isNotSecure = !window.location.protocol.startsWith('https') + if (existsAlready || (isNotSecure && !opts.runOnInsecureOrigins)) { + return // Nothing to do here + } + + window.chrome.runtime = { + // There's a bunch of static data in that property which doesn't seem to change, + // we should periodically check for updates: `JSON.stringify(window.chrome.runtime, null, 2)` + ...STATIC_DATA, + // `chrome.runtime.id` is extension related and returns undefined in Chrome + get id() { + return undefined + }, + // These two require more sophisticated mocks + connect: null, + sendMessage: null + } + + const makeCustomRuntimeErrors = (preamble, method, extensionId) => ({ + NoMatchingSignature: new TypeError( + preamble + `No matching signature.` + ), + MustSpecifyExtensionID: new TypeError( + preamble + + `${method} called from a webpage must specify an Extension ID (string) for its first argument.` + ), + InvalidExtensionID: new TypeError( + preamble + `Invalid extension id: '${extensionId}'` + ) + }) + + // Valid Extension IDs are 32 characters in length and use the letter `a` to `p`: + // https://source.chromium.org/chromium/chromium/src/+/master:components/crx_file/id_util.cc;drc=14a055ccb17e8c8d5d437fe080faba4c6f07beac;l=90 + const isValidExtensionID = str => + str.length === 32 && str.toLowerCase().match(/^[a-p]+$/) + + /** Mock `chrome.runtime.sendMessage` */ + const sendMessageHandler = { + apply: function (target, ctx, args) { + const [extensionId, options, responseCallback] = args || [] + + // Define custom errors + const errorPreamble = `Error in invocation of runtime.sendMessage(optional string extensionId, any message, optional object options, optional function responseCallback): ` + const Errors = makeCustomRuntimeErrors( + errorPreamble, + `chrome.runtime.sendMessage()`, + extensionId + ) + + // Check if the call signature looks ok + const noArguments = args.length === 0 + const tooManyArguments = args.length > 4 + const incorrectOptions = options && typeof options !== 'object' + const incorrectResponseCallback = + responseCallback && typeof responseCallback !== 'function' + if ( + noArguments || + tooManyArguments || + incorrectOptions || + incorrectResponseCallback + ) { + throw Errors.NoMatchingSignature + } + + // At least 2 arguments are required before we even validate the extension ID + if (args.length < 2) { + throw Errors.MustSpecifyExtensionID + } + + // Now let's make sure we got a string as extension ID + if (typeof extensionId !== 'string') { + throw Errors.NoMatchingSignature + } + + if (!isValidExtensionID(extensionId)) { + throw Errors.InvalidExtensionID + } + + return undefined // Normal behavior + } + } + utils.mockWithProxy( + window.chrome.runtime, + 'sendMessage', + function sendMessage() { }, + sendMessageHandler + ) + + /** + * Mock `chrome.runtime.connect` + * + * @see https://developer.chrome.com/apps/runtime#method-connect + */ + const connectHandler = { + apply: function (target, ctx, args) { + const [extensionId, connectInfo] = args || [] + + // Define custom errors + const errorPreamble = `Error in invocation of runtime.connect(optional string extensionId, optional object connectInfo): ` + const Errors = makeCustomRuntimeErrors( + errorPreamble, + `chrome.runtime.connect()`, + extensionId + ) + + // Behavior differs a bit from sendMessage: + const noArguments = args.length === 0 + const emptyStringArgument = args.length === 1 && extensionId === '' + if (noArguments || emptyStringArgument) { + throw Errors.MustSpecifyExtensionID + } + + const tooManyArguments = args.length > 2 + const incorrectConnectInfoType = + connectInfo && typeof connectInfo !== 'object' + + if (tooManyArguments || incorrectConnectInfoType) { + throw Errors.NoMatchingSignature + } + + const extensionIdIsString = typeof extensionId === 'string' + if (extensionIdIsString && extensionId === '') { + throw Errors.MustSpecifyExtensionID + } + if (extensionIdIsString && !isValidExtensionID(extensionId)) { + throw Errors.InvalidExtensionID + } + + // There's another edge-case here: extensionId is optional so we might find a connectInfo object as first param, which we need to validate + const validateConnectInfo = ci => { + // More than a first param connectInfo as been provided + if (args.length > 1) { + throw Errors.NoMatchingSignature + } + // An empty connectInfo has been provided + if (Object.keys(ci).length === 0) { + throw Errors.MustSpecifyExtensionID + } + // Loop over all connectInfo props an check them + Object.entries(ci).forEach(([k, v]) => { + const isExpected = ['name', 'includeTlsChannelId'].includes(k) + if (!isExpected) { + throw new TypeError( + errorPreamble + `Unexpected property: '${k}'.` + ) + } + const MismatchError = (propName, expected, found) => + TypeError( + errorPreamble + + `Error at property '${propName}': Invalid type: expected ${expected}, found ${found}.` + ) + if (k === 'name' && typeof v !== 'string') { + throw MismatchError(k, 'string', typeof v) + } + if (k === 'includeTlsChannelId' && typeof v !== 'boolean') { + throw MismatchError(k, 'boolean', typeof v) + } + }) + } + if (typeof extensionId === 'object') { + validateConnectInfo(extensionId) + throw Errors.MustSpecifyExtensionID + } + + // Unfortunately even when the connect fails Chrome will return an object with methods we need to mock as well + return utils.patchToStringNested(makeConnectResponse()) + } + } + utils.mockWithProxy( + window.chrome.runtime, + 'connect', + function connect() { }, + connectHandler + ) + + function makeConnectResponse() { + const onSomething = () => ({ + addListener: function addListener() { }, + dispatch: function dispatch() { }, + hasListener: function hasListener() { }, + hasListeners: function hasListeners() { + return false + }, + removeListener: function removeListener() { } + }) + + const response = { + name: '', + sender: undefined, + disconnect: function disconnect() { }, + onDisconnect: onSomething(), + onMessage: onSomething(), + postMessage: function postMessage() { + if (!arguments.length) { + throw new TypeError(`Insufficient number of arguments.`) + } + throw new Error(`Attempting to use a disconnected port object`) + } + } + return response + } +})() diff --git a/js/stealth/iframe.contentWindow.js b/js/stealth/iframe.contentWindow.js new file mode 100644 index 0000000..030b79b --- /dev/null +++ b/js/stealth/iframe.contentWindow.js @@ -0,0 +1,106 @@ +// https://github.com/berstend/puppeteer-extra/blob/master/packages/puppeteer-extra-plugin-stealth/evasions/iframe.contentWindow/index.js + +(function () { + utils.init(); + try { + // Adds a contentWindow proxy to the provided iframe element + const addContentWindowProxy = iframe => { + const contentWindowProxy = { + get(target, key) { + // Now to the interesting part: + // We actually make this thing behave like a regular iframe window, + // by intercepting calls to e.g. `.self` and redirect it to the correct thing. :) + // That makes it possible for these assertions to be correct: + // iframe.contentWindow.self === window.top // must be false + if (key === 'self') { + return this + } + // iframe.contentWindow.frameElement === iframe // must be true + if (key === 'frameElement') { + return iframe + } + // Intercept iframe.contentWindow[0] to hide the property 0 added by the proxy. + if (key === '0') { + return undefined + } + return Reflect.get(target, key) + } + } + + if (!iframe.contentWindow) { + const proxy = new Proxy(window, contentWindowProxy) + Object.defineProperty(iframe, 'contentWindow', { + get() { + return proxy + }, + set(newValue) { + return newValue // contentWindow is immutable + }, + enumerable: true, + configurable: false + }) + } + } + + // Handles iframe element creation, augments `srcdoc` property so we can intercept further + const handleIframeCreation = (target, thisArg, args) => { + const iframe = target.apply(thisArg, args) + + // We need to keep the originals around + const _iframe = iframe + const _srcdoc = _iframe.srcdoc + + // Add hook for the srcdoc property + // We need to be very surgical here to not break other iframes by accident + Object.defineProperty(iframe, 'srcdoc', { + configurable: true, // Important, so we can reset this later + get: function () { + return _srcdoc + }, + set: function (newValue) { + addContentWindowProxy(this) + // Reset property, the hook is only needed once + Object.defineProperty(iframe, 'srcdoc', { + configurable: false, + writable: false, + value: _srcdoc + }) + _iframe.srcdoc = newValue + } + }) + return iframe + } + + // Adds a hook to intercept iframe creation events + const addIframeCreationSniffer = () => { + /* global document */ + const createElementHandler = { + // Make toString() native + get(target, key) { + return Reflect.get(target, key) + }, + apply: function (target, thisArg, args) { + const isIframe = + args && args.length && `${args[0]}`.toLowerCase() === 'iframe' + if (!isIframe) { + // Everything as usual + return target.apply(thisArg, args) + } else { + return handleIframeCreation(target, thisArg, args) + } + } + } + // All this just due to iframes with srcdoc bug + utils.replaceWithProxy( + document, + 'createElement', + createElementHandler + ) + } + + // Let's go + addIframeCreationSniffer() + } catch (err) { + // console.warn(err) + } +})() diff --git a/js/stealth/media.codecs.js b/js/stealth/media.codecs.js new file mode 100644 index 0000000..37503de --- /dev/null +++ b/js/stealth/media.codecs.js @@ -0,0 +1,58 @@ +// https://github.com/berstend/puppeteer-extra/blob/master/packages/puppeteer-extra-plugin-stealth/evasions/media.codecs/index.js + +(function () { + utils.init(); + const parseInput = arg => { + const [mime, codecStr] = arg.trim().split(';') + let codecs = [] + if (codecStr && codecStr.includes('codecs="')) { + codecs = codecStr + .trim() + .replace(`codecs="`, '') + .replace(`"`, '') + .trim() + .split(',') + .filter(x => !!x) + .map(x => x.trim()) + } + return { + mime, + codecStr, + codecs + } + } + + const canPlayType = { + // Intercept certain requests + apply: function (target, ctx, args) { + if (!args || !args.length) { + return target.apply(ctx, args) + } + const { mime, codecs } = parseInput(args[0]) + // This specific mp4 codec is missing in Chromium + if (mime === 'video/mp4') { + if (codecs.includes('avc1.42E01E')) { + return 'probably' + } + } + // This mimetype is only supported if no codecs are specified + if (mime === 'audio/x-m4a' && !codecs.length) { + return 'maybe' + } + + // This mimetype is only supported if no codecs are specified + if (mime === 'audio/aac' && !codecs.length) { + return 'probably' + } + // Everything else as usual + return target.apply(ctx, args) + } + } + + /* global HTMLMediaElement */ + utils.replaceWithProxy( + HTMLMediaElement.prototype, + 'canPlayType', + canPlayType + ) +})() diff --git a/js/stealth/navigator.hardwareConcurrency.js b/js/stealth/navigator.hardwareConcurrency.js new file mode 100644 index 0000000..30ff117 --- /dev/null +++ b/js/stealth/navigator.hardwareConcurrency.js @@ -0,0 +1,10 @@ +(function () { + utils.init(); + utils.replaceGetterWithProxy( + Object.getPrototypeOf(navigator), + 'hardwareConcurrency', + utils.makeHandler().getterValue({ + hardwareConcurrency: 4 + }) + ) +})() diff --git a/js/stealth/navigator.languages.js b/js/stealth/navigator.languages.js new file mode 100644 index 0000000..eef9ba6 --- /dev/null +++ b/js/stealth/navigator.languages.js @@ -0,0 +1,9 @@ +(function () { + utils.init(); + const languages = ['en-US', 'en'] + utils.replaceGetterWithProxy( + Object.getPrototypeOf(navigator), + 'languages', + utils.makeHandler().getterValue(Object.freeze([...languages])) + ) +})() diff --git a/js/stealth/navigator.permissions.js b/js/stealth/navigator.permissions.js new file mode 100644 index 0000000..779a520 --- /dev/null +++ b/js/stealth/navigator.permissions.js @@ -0,0 +1,44 @@ +// https://github.com/berstend/puppeteer-extra/blob/master/packages/puppeteer-extra-plugin-stealth/evasions/navigator.permissions/index.js + +(function () { + utils.init(); + const isSecure = document.location.protocol.startsWith('https') + + // In headful on secure origins the permission should be "default", not "denied" + if (isSecure) { + utils.replaceGetterWithProxy(Notification, 'permission', { + apply() { + return 'default' + } + }) + } + + // Another weird behavior: + // On insecure origins in headful the state is "denied", + // whereas in headless it's "prompt" + if (!isSecure) { + const handler = { + apply(target, ctx, args) { + const param = (args || [])[0] + + const isNotifications = + param && param.name && param.name === 'notifications' + if (!isNotifications) { + return utils.cache.Reflect.apply(...arguments) + } + + return Promise.resolve( + Object.setPrototypeOf( + { + state: 'denied', + onchange: null + }, + PermissionStatus.prototype + ) + ) + } + } + // Note: Don't use `Object.getPrototypeOf` here + utils.replaceWithProxy(Permissions.prototype, 'query', handler) + } +})() diff --git a/js/stealth/navigator.plugins.js b/js/stealth/navigator.plugins.js new file mode 100644 index 0000000..b371243 --- /dev/null +++ b/js/stealth/navigator.plugins.js @@ -0,0 +1,294 @@ +// https://github.com/berstend/puppeteer-extra/blob/master/packages/puppeteer-extra-plugin-stealth/evasions/navigator.plugins/index.js + +(function () { + utils.init(); + let fns = { + generateMimeTypeArray: (fns) => mimeTypesData => { + return fns.generateMagicArray(fns)( + mimeTypesData, + MimeTypeArray.prototype, + MimeType.prototype, + 'type' + ) + }, + generatePluginArray: (fns) => pluginsData => { + return fns.generateMagicArray(fns)( + pluginsData, + PluginArray.prototype, + Plugin.prototype, + 'name' + ) + }, + generateMagicArray: (fns) => + function ( + dataArray = [], + proto = MimeTypeArray.prototype, + itemProto = MimeType.prototype, + itemMainProp = 'type' + ) { + // Quick helper to set props with the same descriptors vanilla is using + const defineProp = (obj, prop, value) => + Object.defineProperty(obj, prop, { + value, + writable: false, + enumerable: false, // Important for mimeTypes & plugins: `JSON.stringify(navigator.mimeTypes)` + configurable: true + }) + + // Loop over our fake data and construct items + const makeItem = data => { + const item = {} + for (const prop of Object.keys(data)) { + if (prop.startsWith('__')) { + continue + } + defineProp(item, prop, data[prop]) + } + return patchItem(item, data) + } + + const patchItem = (item, data) => { + let descriptor = Object.getOwnPropertyDescriptors(item) + + // Special case: Plugins have a magic length property which is not enumerable + // e.g. `navigator.plugins[i].length` should always be the length of the assigned mimeTypes + if (itemProto === Plugin.prototype) { + descriptor = { + ...descriptor, + length: { + value: data.__mimeTypes.length, + writable: false, + enumerable: false, + configurable: true // Important to be able to use the ownKeys trap in a Proxy to strip `length` + } + } + } + + // We need to spoof a specific `MimeType` or `Plugin` object + const obj = Object.create(itemProto, descriptor) + + // Virtually all property keys are not enumerable in vanilla + const blacklist = [...Object.keys(data), 'length', 'enabledPlugin'] + return new Proxy(obj, { + ownKeys(target) { + return Reflect.ownKeys(target).filter(k => !blacklist.includes(k)) + }, + getOwnPropertyDescriptor(target, prop) { + if (blacklist.includes(prop)) { + return undefined + } + return Reflect.getOwnPropertyDescriptor(target, prop) + } + }) + } + + const magicArray = [] + + // Loop through our fake data and use that to create convincing entities + dataArray.forEach(data => { + magicArray.push(makeItem(data)) + }) + + // Add direct property access based on types (e.g. `obj['application/pdf']`) afterwards + magicArray.forEach(entry => { + defineProp(magicArray, entry[itemMainProp], entry) + }) + + // This is the best way to fake the type to make sure this is false: `Array.isArray(navigator.mimeTypes)` + const magicArrayObj = Object.create(proto, { + ...Object.getOwnPropertyDescriptors(magicArray), + + // There's one ugly quirk we unfortunately need to take care of: + // The `MimeTypeArray` prototype has an enumerable `length` property, + // but headful Chrome will still skip it when running `Object.getOwnPropertyNames(navigator.mimeTypes)`. + // To strip it we need to make it first `configurable` and can then overlay a Proxy with an `ownKeys` trap. + length: { + value: magicArray.length, + writable: false, + enumerable: false, + configurable: true // Important to be able to use the ownKeys trap in a Proxy to strip `length` + } + }) + + // Generate our functional function mocks :-) + const functionMocks = fns.generateFunctionMocks()( + proto, + itemMainProp, + magicArray + ) + + // We need to overlay our custom object with a JS Proxy + const magicArrayObjProxy = new Proxy(magicArrayObj, { + get(target, key = '') { + // Redirect function calls to our custom proxied versions mocking the vanilla behavior + if (key === 'item') { + return functionMocks.item + } + if (key === 'namedItem') { + return functionMocks.namedItem + } + if (proto === PluginArray.prototype && key === 'refresh') { + return functionMocks.refresh + } + // Everything else can pass through as normal + return utils.cache.Reflect.get(...arguments) + }, + ownKeys(target) { + // There are a couple of quirks where the original property demonstrates "magical" behavior that makes no sense + // This can be witnessed when calling `Object.getOwnPropertyNames(navigator.mimeTypes)` and the absense of `length` + // My guess is that it has to do with the recent change of not allowing data enumeration and this being implemented weirdly + // For that reason we just completely fake the available property names based on our data to match what regular Chrome is doing + // Specific issues when not patching this: `length` property is available, direct `types` props (e.g. `obj['application/pdf']`) are missing + const keys = [] + const typeProps = magicArray.map(mt => mt[itemMainProp]) + typeProps.forEach((_, i) => keys.push(`${i}`)) + typeProps.forEach(propName => keys.push(propName)) + return keys + }, + getOwnPropertyDescriptor(target, prop) { + if (prop === 'length') { + return undefined + } + return Reflect.getOwnPropertyDescriptor(target, prop) + } + }) + + return magicArrayObjProxy + }, + generateFunctionMocks: () => ( + proto, + itemMainProp, + dataArray + ) => ({ + /** Returns the MimeType object with the specified index. */ + item: utils.createProxy(proto.item, { + apply(target, ctx, args) { + if (!args.length) { + throw new TypeError( + `Failed to execute 'item' on '${proto[Symbol.toStringTag] + }': 1 argument required, but only 0 present.` + ) + } + // Special behavior alert: + // - Vanilla tries to cast strings to Numbers (only integers!) and use them as property index lookup + // - If anything else than an integer (including as string) is provided it will return the first entry + const isInteger = args[0] && Number.isInteger(Number(args[0])) // Cast potential string to number first, then check for integer + // Note: Vanilla never returns `undefined` + return (isInteger ? dataArray[Number(args[0])] : dataArray[0]) || null + } + }), + /** Returns the MimeType object with the specified name. */ + namedItem: utils.createProxy(proto.namedItem, { + apply(target, ctx, args) { + if (!args.length) { + throw new TypeError( + `Failed to execute 'namedItem' on '${proto[Symbol.toStringTag] + }': 1 argument required, but only 0 present.` + ) + } + return dataArray.find(mt => mt[itemMainProp] === args[0]) || null // Not `undefined`! + } + }), + /** Does nothing and shall return nothing */ + refresh: proto.refresh + ? utils.createProxy(proto.refresh, { + apply(target, ctx, args) { + return undefined + } + }) + : undefined + }) + } + + const data = { + "mimeTypes": [ + { + "type": "application/pdf", + "suffixes": "pdf", + "description": "", + "__pluginName": "Chrome PDF Viewer" + }, + { + "type": "application/x-google-chrome-pdf", + "suffixes": "pdf", + "description": "Portable Document Format", + "__pluginName": "Chrome PDF Plugin" + }, + { + "type": "application/x-nacl", + "suffixes": "", + "description": "Native Client Executable", + "__pluginName": "Native Client" + }, + { + "type": "application/x-pnacl", + "suffixes": "", + "description": "Portable Native Client Executable", + "__pluginName": "Native Client" + } + ], + "plugins": [ + { + "name": "Chrome PDF Plugin", + "filename": "internal-pdf-viewer", + "description": "Portable Document Format", + "__mimeTypes": ["application/x-google-chrome-pdf"] + }, + { + "name": "Chrome PDF Viewer", + "filename": "mhjfbmdgcfjbbpaeojofohoefgiehjai", + "description": "", + "__mimeTypes": ["application/pdf"] + }, + { + "name": "Native Client", + "filename": "internal-nacl-plugin", + "description": "", + "__mimeTypes": ["application/x-nacl", "application/x-pnacl"] + } + ] + } + + // That means we're running headful + const hasPlugins = 'plugins' in navigator && navigator.plugins.length + if (hasPlugins) { + return // nothing to do here + } + + const mimeTypes = fns.generateMimeTypeArray(fns)(data.mimeTypes) + const plugins = fns.generatePluginArray(fns)(data.plugins) + + // Plugin and MimeType cross-reference each other, let's do that now + // Note: We're looping through `data.plugins` here, not the generated `plugins` + for (const pluginData of data.plugins) { + pluginData.__mimeTypes.forEach((type, index) => { + plugins[pluginData.name][index] = mimeTypes[type] + + Object.defineProperty(plugins[pluginData.name], type, { + value: mimeTypes[type], + writable: false, + enumerable: false, // Not enumerable + configurable: true + }) + Object.defineProperty(mimeTypes[type], 'enabledPlugin', { + value: + type === 'application/x-pnacl' + ? mimeTypes['application/x-nacl'].enabledPlugin // these reference the same plugin, so we need to re-use the Proxy in order to avoid leaks + : new Proxy(plugins[pluginData.name], {}), // Prevent circular references + writable: false, + enumerable: false, // Important: `JSON.stringify(navigator.plugins)` + configurable: true + }) + }) + } + + const patchNavigator = (name, value) => + utils.replaceProperty(Object.getPrototypeOf(navigator), name, { + get() { + return value + } + }) + + patchNavigator('mimeTypes', mimeTypes) + patchNavigator('plugins', plugins) +})() diff --git a/js/stealth/navigator.vendor.js b/js/stealth/navigator.vendor.js new file mode 100644 index 0000000..16d251c --- /dev/null +++ b/js/stealth/navigator.vendor.js @@ -0,0 +1,10 @@ +// https://github.com/berstend/puppeteer-extra/blob/master/packages/puppeteer-extra-plugin-stealth/evasions/navigator.vendor/index.js + +(function () { + utils.init(); + const vendor = "Google Inc." + utils.replaceGetterWithProxy( + Object.getPrototypeOf(navigator), + 'vendor', + utils.makeHandler().getterValue(vendor)) +})() diff --git a/js/stealth/navigator.webdriver.js b/js/stealth/navigator.webdriver.js new file mode 100644 index 0000000..181518f --- /dev/null +++ b/js/stealth/navigator.webdriver.js @@ -0,0 +1,5 @@ +(function () { + if (navigator.webdriver) { + delete Object.getPrototypeOf(navigator).webdriver + } +})(); diff --git a/js/stealth/utils.js b/js/stealth/utils.js new file mode 100644 index 0000000..ba94529 --- /dev/null +++ b/js/stealth/utils.js @@ -0,0 +1,580 @@ +// https://github.com/berstend/puppeteer-extra/blob/master/packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.js + +/** + * A set of shared utility functions specifically for the purpose of modifying native browser APIs without leaving traces. + * + * Meant to be passed down in puppeteer and used in the context of the page (everything in here runs in NodeJS as well as a browser). + * + * Note: If for whatever reason you need to use this outside of `puppeteer-extra`: + * Just remove the `module.exports` statement at the very bottom, the rest can be copy pasted into any browser context. + * + * Alternatively take a look at the `extract-stealth-evasions` package to create a finished bundle which includes these utilities. + * + */ +var utils = {} + +utils.init = () => { + utils.preloadCache() +} + +/** + * Wraps a JS Proxy Handler and strips it's presence from error stacks, in case the traps throw. + * + * The presence of a JS Proxy can be revealed as it shows up in error stack traces. + * + * @param {object} handler - The JS Proxy handler to wrap + */ +utils.stripProxyFromErrors = (handler = {}) => { + const newHandler = { + setPrototypeOf: function (target, proto) { + if (proto === null) + throw new TypeError('Cannot convert object to primitive value') + if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) { + throw new TypeError('Cyclic __proto__ value') + } + return Reflect.setPrototypeOf(target, proto) + } + } + // We wrap each trap in the handler in a try/catch and modify the error stack if they throw + const traps = Object.getOwnPropertyNames(handler) + traps.forEach(trap => { + newHandler[trap] = function () { + try { + // Forward the call to the defined proxy handler + return handler[trap].apply(this, arguments || []) + } catch (err) { + // Stack traces differ per browser, we only support chromium based ones currently + if (!err || !err.stack || !err.stack.includes(`at `)) { + throw err + } + + // When something throws within one of our traps the Proxy will show up in error stacks + // An earlier implementation of this code would simply strip lines with a blacklist, + // but it makes sense to be more surgical here and only remove lines related to our Proxy. + // We try to use a known "anchor" line for that and strip it with everything above it. + // If the anchor line cannot be found for some reason we fall back to our blacklist approach. + + const stripWithBlacklist = (stack, stripFirstLine = true) => { + const blacklist = [ + `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply + `at Object.${trap} `, // e.g. Object.get or Object.apply + `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-) + ] + return ( + err.stack + .split('\n') + // Always remove the first (file) line in the stack (guaranteed to be our proxy) + .filter((line, index) => !(index === 1 && stripFirstLine)) + // Check if the line starts with one of our blacklisted strings + .filter(line => !blacklist.some(bl => line.trim().startsWith(bl))) + .join('\n') + ) + } + + const stripWithAnchor = (stack, anchor) => { + const stackArr = stack.split('\n') + anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium + const anchorIndex = stackArr.findIndex(line => + line.trim().startsWith(anchor) + ) + if (anchorIndex === -1) { + return false // 404, anchor not found + } + // Strip everything from the top until we reach the anchor line + // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`) + stackArr.splice(1, anchorIndex) + return stackArr.join('\n') + } + + // Special cases due to our nested toString proxies + err.stack = err.stack.replace( + 'at Object.toString (', + 'at Function.toString (' + ) + if ((err.stack || '').includes('at Function.toString (')) { + err.stack = stripWithBlacklist(err.stack, false) + throw err + } + + // Try using the anchor method, fallback to blacklist if necessary + err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack) + + throw err // Re-throw our now sanitized error + } + } + }) + return newHandler +} + +/** + * Strip error lines from stack traces until (and including) a known line the stack. + * + * @param {object} err - The error to sanitize + * @param {string} anchor - The string the anchor line starts with + */ +utils.stripErrorWithAnchor = (err, anchor) => { + const stackArr = err.stack.split('\n') + const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor)) + if (anchorIndex === -1) { + return err // 404, anchor not found + } + // Strip everything from the top until we reach the anchor line (remove anchor line as well) + // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`) + stackArr.splice(1, anchorIndex) + err.stack = stackArr.join('\n') + return err +} + +/** + * Replace the property of an object in a stealthy way. + * + * Note: You also want to work on the prototype of an object most often, + * as you'd otherwise leave traces (e.g. showing up in Object.getOwnPropertyNames(obj)). + * + * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object/defineProperty + * + * @example + * replaceProperty(WebGLRenderingContext.prototype, 'getParameter', { value: "alice" }) + * // or + * replaceProperty(Object.getPrototypeOf(navigator), 'languages', { get: () => ['en-US', 'en'] }) + * + * @param {object} obj - The object which has the property to replace + * @param {string} propName - The property name to replace + * @param {object} descriptorOverrides - e.g. { value: "alice" } + */ +utils.replaceProperty = (obj, propName, descriptorOverrides = {}) => { + return Object.defineProperty(obj, propName, { + // Copy over the existing descriptors (writable, enumerable, configurable, etc) + ...(Object.getOwnPropertyDescriptor(obj, propName) || {}), + // Add our overrides (e.g. value, get()) + ...descriptorOverrides + }) +} + +/** + * Preload a cache of function copies and data. + * + * For a determined enough observer it would be possible to overwrite and sniff usage of functions + * we use in our internal Proxies, to combat that we use a cached copy of those functions. + * + * Note: Whenever we add a `Function.prototype.toString` proxy we should preload the cache before, + * by executing `utils.preloadCache()` before the proxy is applied (so we don't cause recursive lookups). + * + * This is evaluated once per execution context (e.g. window) + */ +utils.preloadCache = () => { + if (utils.cache) { + return + } + utils.cache = { + // Used in our proxies + Reflect: { + get: Reflect.get.bind(Reflect), + apply: Reflect.apply.bind(Reflect) + }, + // Used in `makeNativeString` + nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }` + } +} + +/** + * Utility function to generate a cross-browser `toString` result representing native code. + * + * There's small differences: Chromium uses a single line, whereas FF & Webkit uses multiline strings. + * To future-proof this we use an existing native toString result as the basis. + * + * The only advantage we have over the other team is that our JS runs first, hence we cache the result + * of the native toString result once, so they cannot spoof it afterwards and reveal that we're using it. + * + * @example + * makeNativeString('foobar') // => `function foobar() { [native code] }` + * + * @param {string} [name] - Optional function name + */ +utils.makeNativeString = (name = '') => { + return utils.cache.nativeToStringStr.replace('toString', name || '') +} + +/** + * Helper function to modify the `toString()` result of the provided object. + * + * Note: Use `utils.redirectToString` instead when possible. + * + * There's a quirk in JS Proxies that will cause the `toString()` result to differ from the vanilla Object. + * If no string is provided we will generate a `[native code]` thing based on the name of the property object. + * + * @example + * patchToString(WebGLRenderingContext.prototype.getParameter, 'function getParameter() { [native code] }') + * + * @param {object} obj - The object for which to modify the `toString()` representation + * @param {string} str - Optional string used as a return value + */ +utils.patchToString = (obj, str = '') => { + const handler = { + apply: function (target, ctx) { + // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + ""` + if (ctx === Function.prototype.toString) { + return utils.makeNativeString('toString') + } + // `toString` targeted at our proxied Object detected + if (ctx === obj) { + // We either return the optional string verbatim or derive the most desired result automatically + return str || utils.makeNativeString(obj.name) + } + // Check if the toString protype of the context is the same as the global prototype, + // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case + const hasSameProto = Object.getPrototypeOf( + Function.prototype.toString + ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins + if (!hasSameProto) { + // Pass the call on to the local Function.prototype.toString instead + return ctx.toString() + } + return target.call(ctx) + } + } + + const toStringProxy = new Proxy( + Function.prototype.toString, + utils.stripProxyFromErrors(handler) + ) + utils.replaceProperty(Function.prototype, 'toString', { + value: toStringProxy + }) +} + +/** + * Make all nested functions of an object native. + * + * @param {object} obj + */ +utils.patchToStringNested = (obj = {}) => { + return utils.execRecursively(obj, ['function'], utils.patchToString) +} + +/** + * Redirect toString requests from one object to another. + * + * @param {object} proxyObj - The object that toString will be called on + * @param {object} originalObj - The object which toString result we wan to return + */ +utils.redirectToString = (proxyObj, originalObj) => { + const handler = { + apply: function (target, ctx) { + // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + ""` + if (ctx === Function.prototype.toString) { + return utils.makeNativeString('toString') + } + + // `toString` targeted at our proxied Object detected + if (ctx === proxyObj) { + const fallback = () => + originalObj && originalObj.name + ? utils.makeNativeString(originalObj.name) + : utils.makeNativeString(proxyObj.name) + + // Return the toString representation of our original object if possible + return originalObj + '' || fallback() + } + + if (typeof ctx === 'undefined' || ctx === null) { + return target.call(ctx) + } + + // Check if the toString protype of the context is the same as the global prototype, + // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case + const hasSameProto = Object.getPrototypeOf( + Function.prototype.toString + ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins + if (!hasSameProto) { + // Pass the call on to the local Function.prototype.toString instead + return ctx.toString() + } + + return target.call(ctx) + } + } + + const toStringProxy = new Proxy( + Function.prototype.toString, + utils.stripProxyFromErrors(handler) + ) + utils.replaceProperty(Function.prototype, 'toString', { + value: toStringProxy + }) +} + +/** + * All-in-one method to replace a property with a JS Proxy using the provided Proxy handler with traps. + * + * Will stealthify these aspects (strip error stack traces, redirect toString, etc). + * Note: This is meant to modify native Browser APIs and works best with prototype objects. + * + * @example + * replaceWithProxy(WebGLRenderingContext.prototype, 'getParameter', proxyHandler) + * + * @param {object} obj - The object which has the property to replace + * @param {string} propName - The name of the property to replace + * @param {object} handler - The JS Proxy handler to use + */ +utils.replaceWithProxy = (obj, propName, handler) => { + const originalObj = obj[propName] + const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler)) + + utils.replaceProperty(obj, propName, { value: proxyObj }) + utils.redirectToString(proxyObj, originalObj) + + return true +} +/** + * All-in-one method to replace a getter with a JS Proxy using the provided Proxy handler with traps. + * + * @example + * replaceGetterWithProxy(Object.getPrototypeOf(navigator), 'vendor', proxyHandler) + * + * @param {object} obj - The object which has the property to replace + * @param {string} propName - The name of the property to replace + * @param {object} handler - The JS Proxy handler to use + */ +utils.replaceGetterWithProxy = (obj, propName, handler) => { + const fn = Object.getOwnPropertyDescriptor(obj, propName).get + const fnStr = fn.toString() // special getter function string + const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler)) + + utils.replaceProperty(obj, propName, { get: proxyObj }) + utils.patchToString(proxyObj, fnStr) + + return true +} + +/** + * All-in-one method to replace a getter and/or setter. Functions get and set + * of handler have one more argument that contains the native function. + * + * @example + * replaceGetterSetter(HTMLIFrameElement.prototype, 'contentWindow', handler) + * + * @param {object} obj - The object which has the property to replace + * @param {string} propName - The name of the property to replace + * @param {object} handlerGetterSetter - The handler with get and/or set + * functions + * @see https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object/defineProperty#description + */ +utils.replaceGetterSetter = (obj, propName, handlerGetterSetter) => { + const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName) + const handler = { ...ownPropertyDescriptor } + + if (handlerGetterSetter.get !== undefined) { + const nativeFn = ownPropertyDescriptor.get + handler.get = function() { + return handlerGetterSetter.get.call(this, nativeFn.bind(this)) + } + utils.redirectToString(handler.get, nativeFn) + } + + if (handlerGetterSetter.set !== undefined) { + const nativeFn = ownPropertyDescriptor.set + handler.set = function(newValue) { + handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this)) + } + utils.redirectToString(handler.set, nativeFn) + } + + Object.defineProperty(obj, propName, handler) +} + +/** + * All-in-one method to mock a non-existing property with a JS Proxy using the provided Proxy handler with traps. + * + * Will stealthify these aspects (strip error stack traces, redirect toString, etc). + * + * @example + * mockWithProxy(chrome.runtime, 'sendMessage', function sendMessage() {}, proxyHandler) + * + * @param {object} obj - The object which has the property to replace + * @param {string} propName - The name of the property to replace or create + * @param {object} pseudoTarget - The JS Proxy target to use as a basis + * @param {object} handler - The JS Proxy handler to use + */ +utils.mockWithProxy = (obj, propName, pseudoTarget, handler) => { + const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler)) + + utils.replaceProperty(obj, propName, { value: proxyObj }) + utils.patchToString(proxyObj) + + return true +} + +/** + * All-in-one method to create a new JS Proxy with stealth tweaks. + * + * This is meant to be used whenever we need a JS Proxy but don't want to replace or mock an existing known property. + * + * Will stealthify certain aspects of the Proxy (strip error stack traces, redirect toString, etc). + * + * @example + * createProxy(navigator.mimeTypes.__proto__.namedItem, proxyHandler) // => Proxy + * + * @param {object} pseudoTarget - The JS Proxy target to use as a basis + * @param {object} handler - The JS Proxy handler to use + */ +utils.createProxy = (pseudoTarget, handler) => { + const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler)) + utils.patchToString(proxyObj) + + return proxyObj +} + +/** + * Helper function to split a full path to an Object into the first part and property. + * + * @example + * splitObjPath(`HTMLMediaElement.prototype.canPlayType`) + * // => {objName: "HTMLMediaElement.prototype", propName: "canPlayType"} + * + * @param {string} objPath - The full path to an object as dot notation string + */ +utils.splitObjPath = objPath => ({ + // Remove last dot entry (property) ==> `HTMLMediaElement.prototype` + objName: objPath.split('.').slice(0, -1).join('.'), + // Extract last dot entry ==> `canPlayType` + propName: objPath.split('.').slice(-1)[0] +}) + +/** + * Convenience method to replace a property with a JS Proxy using the provided objPath. + * + * Supports a full path (dot notation) to the object as string here, in case that makes it easier. + * + * @example + * replaceObjPathWithProxy('WebGLRenderingContext.prototype.getParameter', proxyHandler) + * + * @param {string} objPath - The full path to an object (dot notation string) to replace + * @param {object} handler - The JS Proxy handler to use + */ +utils.replaceObjPathWithProxy = (objPath, handler) => { + const { objName, propName } = utils.splitObjPath(objPath) + const obj = eval(objName) // eslint-disable-line no-eval + return utils.replaceWithProxy(obj, propName, handler) +} + +/** + * Traverse nested properties of an object recursively and apply the given function on a whitelist of value types. + * + * @param {object} obj + * @param {array} typeFilter - e.g. `['function']` + * @param {Function} fn - e.g. `utils.patchToString` + */ +utils.execRecursively = (obj = {}, typeFilter = [], fn) => { + function recurse(obj) { + for (const key in obj) { + if (obj[key] === undefined) { + continue + } + if (obj[key] && typeof obj[key] === 'object') { + recurse(obj[key]) + } else { + if (obj[key] && typeFilter.includes(typeof obj[key])) { + fn.call(this, obj[key]) + } + } + } + } + recurse(obj) + return obj +} + +/** + * Everything we run through e.g. `page.evaluate` runs in the browser context, not the NodeJS one. + * That means we cannot just use reference variables and functions from outside code, we need to pass everything as a parameter. + * + * Unfortunately the data we can pass is only allowed to be of primitive types, regular functions don't survive the built-in serialization process. + * This utility function will take an object with functions and stringify them, so we can pass them down unharmed as strings. + * + * We use this to pass down our utility functions as well as any other functions (to be able to split up code better). + * + * @see utils.materializeFns + * + * @param {object} fnObj - An object containing functions as properties + */ +utils.stringifyFns = (fnObj = { hello: () => 'world' }) => { + // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine + // https://github.com/feross/fromentries + function fromEntries(iterable) { + return [...iterable].reduce((obj, [key, val]) => { + obj[key] = val + return obj + }, {}) + } + return (Object.fromEntries || fromEntries)( + Object.entries(fnObj) + .filter(([key, value]) => typeof value === 'function') + .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval + ) +} + +/** + * Utility function to reverse the process of `utils.stringifyFns`. + * Will materialize an object with stringified functions (supports classic and fat arrow functions). + * + * @param {object} fnStrObj - An object containing stringified functions as properties + */ +utils.materializeFns = (fnStrObj = { hello: "() => 'world'" }) => { + return Object.fromEntries( + Object.entries(fnStrObj).map(([key, value]) => { + if (value.startsWith('function')) { + // some trickery is needed to make oldschool functions work :-) + return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval + } else { + // arrow functions just work + return [key, eval(value)] // eslint-disable-line no-eval + } + }) + ) +} + +// Proxy handler templates for re-usability +utils.makeHandler = () => ({ + // Used by simple `navigator` getter evasions + getterValue: value => ({ + apply(target, ctx, args) { + // Let's fetch the value first, to trigger and escalate potential errors + // Illegal invocations like `navigator.__proto__.vendor` will throw here + utils.cache.Reflect.apply(...arguments) + return value + } + }) +}) + +/** + * Compare two arrays. + * + * @param {array} array1 - First array + * @param {array} array2 - Second array + */ +utils.arrayEquals = (array1, array2) => { + if (array1.length !== array2.length) { + return false + } + for (let i = 0; i < array1.length; ++i) { + if (array1[i] !== array2[i]) { + return false + } + } + return true +} + +/** + * Cache the method return according to its arguments. + * + * @param {Function} fn - A function that will be cached + */ +utils.memoize = fn => { + const cache = [] + return function(...args) { + if (!cache.some(c => utils.arrayEquals(c.key, args))) { + cache.push({ key: args, value: fn.apply(this, args) }) + } + return cache.find(c => utils.arrayEquals(c.key, args)).value + } +} diff --git a/js/stealth/webgl.vendor.js b/js/stealth/webgl.vendor.js new file mode 100644 index 0000000..1e11e9e --- /dev/null +++ b/js/stealth/webgl.vendor.js @@ -0,0 +1,28 @@ +(function () { + utils.init(); + const getParameterProxyHandler = { + apply: function (target, ctx, args) { + const param = (args || [])[0] + const result = utils.cache.Reflect.apply(target, ctx, args) + // UNMASKED_VENDOR_WEBGL + if (param === 37445) { + return 'Intel Inc.' // default in headless: Google Inc. + } + // UNMASKED_RENDERER_WEBGL + if (param === 37446) { + return 'Intel Iris OpenGL Engine' // default in headless: Google SwiftShader + } + return result + } + } + + // There's more than one WebGL rendering context + // https://developer.mozilla.org/en-US/docs/Web/API/WebGL2RenderingContext#Browser_compatibility + // To find out the original values here: Object.getOwnPropertyDescriptors(WebGLRenderingContext.prototype.getParameter) + const addProxy = (obj, propName) => { + utils.replaceWithProxy(obj, propName, getParameterProxyHandler) + } + // For whatever weird reason loops don't play nice with Object.defineProperty, here's the next best thing: + addProxy(WebGLRenderingContext.prototype, 'getParameter') + addProxy(WebGL2RenderingContext.prototype, 'getParameter') +})() diff --git a/js/stealth/window.outerdimensions.js b/js/stealth/window.outerdimensions.js new file mode 100644 index 0000000..dabf5eb --- /dev/null +++ b/js/stealth/window.outerdimensions.js @@ -0,0 +1,10 @@ +(function () { + try { + if (window.outerWidth && window.outerHeight) { + return // nothing to do here + } + const windowFrame = 85 // probably OS and WM dependent + window.outerWidth = window.innerWidth + window.outerHeight = window.innerHeight + windowFrame + } catch (err) { } +})() diff --git a/src/Wrappers/Selenium.php b/src/Wrappers/Selenium.php index 6660482..ffd60bf 100644 --- a/src/Wrappers/Selenium.php +++ b/src/Wrappers/Selenium.php @@ -9,13 +9,23 @@ use Facebook\WebDriver\Remote\RemoteWebDriver; use Facebook\WebDriver\Remote\WebDriverCommand; use Facebook\WebDriver\WebDriverWait; -use SapiStudio\SeleniumStealth\SeleniumStealth; use TikScraper\Helpers\Tokens; class Selenium { private const DEFAULT_DRIVER_URL = "http://localhost:4444"; private const DEFAULT_TIKTOK_URL = "https://www.tiktok.com/feedback/"; + private const SPOOF_JS = [ + "utils.js", + "chrome.app.js", "chrome.csi.js", "chrome.loadtimes.js", "chrome.runtime.js", + "iframe.contentWindow.js", + "media.codecs.js", + "navigator.hardwareConcurrency.js", "navigator.languages.js", "navigator.permissions.js", + "navigator.plugins.js", "navigator.vendor.js", "navigator.webdriver.js", + "webgl.vendor.js", + "window.outerdimensions.js" + ]; + private RemoteWebDriver $driver; function __construct(array $config, Tokens $tokens) { @@ -25,12 +35,15 @@ function __construct(array $config, Tokens $tokens) { "close_when_done" => false ]; + $args = ["--disable-blink-features=AutomationControlled"]; + // Chrome flags - $args = []; $opts = new ChromeOptions(); + $opts->setExperimentalOption("excludeSwitches", ["enable-automation"]); + if (!$debug) { // Enable headless if not debugging - $args[] = "--headless"; + $args[] = "--headless=new"; } // User defined user agent @@ -45,7 +58,9 @@ function __construct(array $config, Tokens $tokens) { $args[] = "--proxy-server=$proxy"; } - $opts->addArguments($args); + if (count($args) > 0) { + $opts->addArguments($args); + } $cap = DesiredCapabilities::chrome(); $cap->setCapability(ChromeOptions::CAPABILITY_W3C, $opts); @@ -96,15 +111,16 @@ public function getUserAgent(): string { * @return void */ private function _buildSession(string $url, DesiredCapabilities $cap, Tokens $tokens): void { - $js = file_get_contents(__DIR__ . "/../../js/fetch.js"); // Create session - $tmpDriver = RemoteWebDriver::create($url, $cap); - $this->driver = (new SeleniumStealth($tmpDriver))->usePhpWebriverClient()->makeStealth(); + $this->driver = RemoteWebDriver::create($url, $cap); - // Inject custom JS code for fetching TikTok's API $devTools = new ChromeDevToolsDriver($this->driver); + $this->_spoof($devTools); + + $fetch = file_get_contents(__DIR__ . "/../../js/fetch.js"); + // Inject custom JS code for fetching TikTok's API $devTools->execute("Page.addScriptToEvaluateOnNewDocument", [ - "source" => $js + "source" => $fetch ]); $this->driver->get(self::DEFAULT_TIKTOK_URL); @@ -134,4 +150,122 @@ private function _getSessions(string $url): array { return $executor->execute($command)->getValue(); } + + private function _spoof(ChromeDevToolsDriver $devTools): void { + foreach (self::SPOOF_JS as $js) { + $js_str = file_get_contents(__DIR__ . '/../../js/stealth/' . $js); + if ($js_str !== false) { + $devTools->execute("Page.addScriptToEvaluateOnNewDocument", [ + "source" => $js_str + ]); + } + } + $this->_spoofUa($devTools); + } + + private function _spoofUa(ChromeDevToolsDriver $devTools): void { + $ua = $this->getUserAgent(); + $ua = str_replace("HeadlessChrome", "Chrome", $ua); + + // Spoof Linux + if (str_contains($ua, "Linux") && !str_contains($ua, "Android")) { + $ua = preg_replace("/\(([^)]+)\)/", '(Windows NT 10.0; Win64; x64)', $ua); + } + + // Get version + $uaVersion = ""; + if (str_contains($ua, "Chrome")) { + $matches = []; + preg_match("/Chrome\/([\d|.]+)/", $ua, $matches); + $uaVersion = $matches[1]; + } else { + $matches = []; + preg_match("/\/([\d|.]+)/", $this->driver->getCapabilities()->getVersion(), $matches); + } + + // Get platform + $platform = ''; + if (str_contains('Mac OS X', $ua)) { + $platform = 'Mac OS X'; + } else if (str_contains('Android', $ua)) { + $platform = 'Android'; + } else if (str_contains('Linux', $ua)) { + $platform = 'Linux'; + } else { + $platform = 'Windows'; + } + + // Get brands + $seed = explode('.', $uaVersion)[0]; // Major chrome version + $order = [ + [0, 1, 2], + [0, 2, 1], + [1, 0, 2], + [1, 2, 0], + [2, 0, 1], + [2, 1, 0] + ][$seed % 6]; + + $escapedChars = [' ', ' ', ';']; + + $char1 = $escapedChars[$order[0]]; + $char2 = $escapedChars[$order[1]]; + $char3 = $escapedChars[$order[2]]; + + $greaseyBrand = "{$char1}Not{$char2}A{$char3}Brand"; + $greasedBrandVersionList = []; + + $greasedBrandVersionList[$order[0]] = [ + "brand" => $greaseyBrand, + "version" => "99" + ]; + + $greasedBrandVersionList[$order[1]] = [ + "brand" => "Chromium", + "version" => $seed + ]; + + $greasedBrandVersionList[$order[2]] = [ + "brand" => "Google Chrome", + "version" => $seed + ]; + + $os_version = ''; + if (str_contains('Mac OS X ', $ua)) { + $matches = []; + preg_match("/Mac OS X ([^)]+)/", $ua, $matches); + + $os_version = $matches[1]; + } else if (str_contains('Android ', $ua)) { + $matches = []; + preg_match("/Android ([^;]+)/", $ua, $matches); + + $os_version = $matches[1]; + } else if (str_contains('Windows ', $ua)) { + $matches = []; + preg_match("/Windows .*?([\d|.]+);?/", $ua, $matches); + + $os_version = $matches[1]; + } + + $arch = ''; + $model = ''; + $mobile = str_contains('Android', $ua); + if ($mobile) { + $matches = []; + + preg_match("/Android.*?;\s([^)]+)/", $ua, $matches); + $model = $matches[1]; + } else { + $arch = 'x86'; + } + + $ua_rewrite = [ + 'userAgent' => $ua, + 'acceptLanguage' => 'en-US,en', + 'platform' => $platform + ]; + + $devTools->execute('Emulation.setUserAgentOverride', $ua_rewrite); + } }