Skip to content

Commit

Permalink
using puppeteer-extra-plugin-stealth implementation in PHP
Browse files Browse the repository at this point in the history
  • Loading branch information
pablouser1 committed Aug 21, 2024
1 parent 3d1ebc6 commit 94c7ea5
Show file tree
Hide file tree
Showing 21 changed files with 1,809 additions and 44 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ $api = new \TikScraper\Api([
'verify_fp' => 'verify_...', // Cookie used for skipping captcha requests
'device_id' => '596845...' // Custom device id
'user_agent' => 'YOUR_CUSTOM_USER_AGENT_HERE',
'proxy' => 'http://user:password@hostname:port',
'cookie_path' => '/your/custom/path/here/tiktok.json' // Path to store Guzzle's cookies, defaults to /tmp/tiktok.json
'proxy' => 'http://user:password@hostname:port'
], $cacheEngine);

$tag = $api->hashtag('funny');
Expand Down Expand Up @@ -46,6 +45,7 @@ TikScrapperPHP supports caching requests, to use it you need to implement [ICach

HUGE thanks to the following projects, this wouldn't be possible without their help

* [puppeteer-extra-plugin-stealth](https://github.com/berstend/puppeteer-extra/blob/master/packages/puppeteer-extra-plugin-stealth), ported library to PHP
* [TikTok-API-PHP](https://github.com/ssovit/TikTok-API-PHP)
* [TikTok-Api](https://github.com/davidteather/TikTok-Api)
* [tiktok-signature](https://github.com/carcabot/tiktok-signature)
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.6.0.5
2.6.1.0
1 change: 0 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
"require": {
"php": "^8.1",
"php-webdriver/webdriver": "^1.12",
"sapistudio/seleniumstealth": "^1.0",
"guzzlehttp/guzzle": "^7.8"
},
"require-dev": {
Expand Down
32 changes: 1 addition & 31 deletions composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions js/stealth/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# NOTE
This is a port of [puppeteer-extra-plugin-stealth](https://github.com/berstend/puppeteer-extra/blob/master/packages/puppeteer-extra-plugin-stealth) to PHP

`navigator.webdriver` and `user-agent-override` are implemented using PHP code!
77 changes: 77 additions & 0 deletions js/stealth/chrome.app.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// https://github.com/berstend/puppeteer-extra/blob/master/packages/puppeteer-extra-plugin-stealth/evasions/chrome.app/index.js
(function () {
utils.init()
if (!window.chrome) {
// Use the exact property descriptor found in headful Chrome
// fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')`
Object.defineProperty(window, 'chrome', {
writable: true,
enumerable: true,
configurable: false, // note!
value: {} // We'll extend that later
})
}

// That means we're running headful and don't need to mock anything
if ('app' in window.chrome) {
return // Nothing to do here
}

const makeError = {
ErrorInInvocation: fn => {
const err = new TypeError(`Error in invocation of app.${fn}()`)
return utils.stripErrorWithAnchor(
err,
`at ${fn} (eval at <anonymous>`
)
}
}

// There's a some static data in that property which doesn't seem to change,
// we should periodically check for updates: `JSON.stringify(window.app, null, 2)`
const STATIC_DATA = JSON.parse(
`
{
"isInstalled": false,
"InstallState": {
"DISABLED": "disabled",
"INSTALLED": "installed",
"NOT_INSTALLED": "not_installed"
},
"RunningState": {
"CANNOT_RUN": "cannot_run",
"READY_TO_RUN": "ready_to_run",
"RUNNING": "running"
}
}
`.trim()
)

window.chrome.app = {
...STATIC_DATA,

get isInstalled() {
return false
},

getDetails: function getDetails() {
if (arguments.length) {
throw makeError.ErrorInInvocation(`getDetails`)
}
return null
},
getIsInstalled: function getDetails() {
if (arguments.length) {
throw makeError.ErrorInInvocation(`getIsInstalled`)
}
return false
},
runningState: function getDetails() {
if (arguments.length) {
throw makeError.ErrorInInvocation(`runningState`)
}
return 'cannot_run'
}
}
utils.patchToStringNested(window.chrome.app)
})()
37 changes: 37 additions & 0 deletions js/stealth/chrome.csi.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// https://github.com/berstend/puppeteer-extra/blob/master/packages/puppeteer-extra-plugin-stealth/evasions/chrome.csi/index.js

(function () {
utils.init()
if (!window.chrome) {
// Use the exact property descriptor found in headful Chrome
// fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')`
Object.defineProperty(window, 'chrome', {
writable: true,
enumerable: true,
configurable: false, // note!
value: {} // We'll extend that later
})
}

// That means we're running headful and don't need to mock anything
if ('csi' in window.chrome) {
return // Nothing to do here
}

// Check that the Navigation Timing API v1 is available, we need that
if (!window.performance || !window.performance.timing) {
return
}

const { timing } = window.performance

window.chrome.csi = function () {
return {
onloadT: timing.domContentLoadedEventEnd,
startE: timing.navigationStart,
pageT: Date.now() - timing.navigationStart,
tran: 15 // Transition type or something
}
}
utils.patchToString(window.chrome.csi)
})()
128 changes: 128 additions & 0 deletions js/stealth/chrome.loadtimes.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
// https://github.com/berstend/puppeteer-extra/blob/master/packages/puppeteer-extra-plugin-stealth/evasions/chrome.loadTimes/index.js

(function () {
utils.init()
if (!window.chrome) {
// Use the exact property descriptor found in headful Chrome
// fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')`
Object.defineProperty(window, 'chrome', {
writable: true,
enumerable: true,
configurable: false, // note!
value: {} // We'll extend that later
})
}

// That means we're running headful and don't need to mock anything
if ('loadTimes' in window.chrome) {
return // Nothing to do here
}

// Check that the Navigation Timing API v1 + v2 is available, we need that
if (
!window.performance ||
!window.performance.timing ||
!window.PerformancePaintTiming
) {
return
}

const { performance } = window

// Some stuff is not available on about:blank as it requires a navigation to occur,
// let's harden the code to not fail then:
const ntEntryFallback = {
nextHopProtocol: 'h2',
type: 'other'
}

// The API exposes some funky info regarding the connection
const protocolInfo = {
get connectionInfo() {
const ntEntry =
performance.getEntriesByType('navigation')[0] || ntEntryFallback
return ntEntry.nextHopProtocol
},
get npnNegotiatedProtocol() {
// NPN is deprecated in favor of ALPN, but this implementation returns the
// HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN.
const ntEntry =
performance.getEntriesByType('navigation')[0] || ntEntryFallback
return ['h2', 'hq'].includes(ntEntry.nextHopProtocol)
? ntEntry.nextHopProtocol
: 'unknown'
},
get navigationType() {
const ntEntry =
performance.getEntriesByType('navigation')[0] || ntEntryFallback
return ntEntry.type
},
get wasAlternateProtocolAvailable() {
// The Alternate-Protocol header is deprecated in favor of Alt-Svc
// (https://www.mnot.net/blog/2016/03/09/alt-svc), so technically this
// should always return false.
return false
},
get wasFetchedViaSpdy() {
// SPDY is deprecated in favor of HTTP/2, but this implementation returns
// true for HTTP/2 or HTTP2+QUIC/39 as well.
const ntEntry =
performance.getEntriesByType('navigation')[0] || ntEntryFallback
return ['h2', 'hq'].includes(ntEntry.nextHopProtocol)
},
get wasNpnNegotiated() {
// NPN is deprecated in favor of ALPN, but this implementation returns true
// for HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN.
const ntEntry =
performance.getEntriesByType('navigation')[0] || ntEntryFallback
return ['h2', 'hq'].includes(ntEntry.nextHopProtocol)
}
}

const { timing } = window.performance

// Truncate number to specific number of decimals, most of the `loadTimes` stuff has 3
function toFixed(num, fixed) {
var re = new RegExp('^-?\\d+(?:.\\d{0,' + (fixed || -1) + '})?')
return num.toString().match(re)[0]
}

const timingInfo = {
get firstPaintAfterLoadTime() {
// This was never actually implemented and always returns 0.
return 0
},
get requestTime() {
return timing.navigationStart / 1000
},
get startLoadTime() {
return timing.navigationStart / 1000
},
get commitLoadTime() {
return timing.responseStart / 1000
},
get finishDocumentLoadTime() {
return timing.domContentLoadedEventEnd / 1000
},
get finishLoadTime() {
return timing.loadEventEnd / 1000
},
get firstPaintTime() {
const fpEntry = performance.getEntriesByType('paint')[0] || {
startTime: timing.loadEventEnd / 1000 // Fallback if no navigation occured (`about:blank`)
}
return toFixed(
(fpEntry.startTime + performance.timeOrigin) / 1000,
3
)
}
}

window.chrome.loadTimes = function () {
return {
...protocolInfo,
...timingInfo
}
}
utils.patchToString(window.chrome.loadTimes)
})()
Loading

0 comments on commit 94c7ea5

Please sign in to comment.