From 7bd9ff9078d707ad5dcf4216500900f739840afd Mon Sep 17 00:00:00 2001 From: Blake Byrnes Date: Mon, 10 Jan 2022 10:22:32 -0500 Subject: [PATCH] fix(fullstack): tests broken on ubuntu --- core/injected-scripts/MouseEvents.ts | 8 +- core/lib/GlobalPool.ts | 4 + core/lib/Resources.ts | 347 ++++++++++++++++++---- core/lib/Session.ts | 16 +- core/lib/Tab.ts | 20 +- core/test/GlobalPool.test.ts | 1 + core/test/resources.test.ts | 27 +- fullstack/test/resources.test.ts | 10 +- mitm/handlers/HeadersHandler.ts | 4 +- mitm/handlers/RequestSession.ts | 8 +- mitm/interfaces/IBrowserRequestMatcher.ts | 6 + mitm/lib/BrowserRequestMatcher.ts | 284 ------------------ mitm/test/basic.test.ts | 37 +-- 13 files changed, 365 insertions(+), 407 deletions(-) create mode 100644 mitm/interfaces/IBrowserRequestMatcher.ts delete mode 100644 mitm/lib/BrowserRequestMatcher.ts diff --git a/core/injected-scripts/MouseEvents.ts b/core/injected-scripts/MouseEvents.ts index bdc15db08..c892b0982 100644 --- a/core/injected-scripts/MouseEvents.ts +++ b/core/injected-scripts/MouseEvents.ts @@ -7,6 +7,10 @@ class MouseEvents { private static targetNodeId: number; private static containerOffset: { x: number; y: number } = { x: 0, y: 0 }; + public static init() { + this.onMousedown = this.onMousedown.bind(this); + } + public static listenFor( nodeId: number, containerOffset: { x: number; y: number }, @@ -32,10 +36,6 @@ class MouseEvents { return visibility; } - public static init() { - this.onMousedown = this.onMousedown.bind(this); - } - public static didTrigger(nodeId: number) { try { if (this.targetNodeId !== nodeId) { diff --git a/core/lib/GlobalPool.ts b/core/lib/GlobalPool.ts index 11e1f9501..219f224ef 100644 --- a/core/lib/GlobalPool.ts +++ b/core/lib/GlobalPool.ts @@ -90,6 +90,10 @@ export default class GlobalPool { closePromises.push(puppetBrowser.close().catch(err => err)); } MitmProxy.close(); + if (this.mitmStartPromise) { + this.mitmStartPromise.then(x => x.close()).catch(() => null); + this.mitmStartPromise = null; + } if (this.mitmServer) { this.mitmServer.close(); this.mitmServer = null; diff --git a/core/lib/Resources.ts b/core/lib/Resources.ts index 91acc817c..a885756ac 100644 --- a/core/lib/Resources.ts +++ b/core/lib/Resources.ts @@ -4,9 +4,10 @@ import { IRequestSessionResponseEvent, } from '@ulixee/hero-mitm/handlers/RequestSession'; import IResourceMeta from '@ulixee/hero-interfaces/IResourceMeta'; +import { CanceledPromiseError } from '@ulixee/commons/interfaces/IPendingWaitEvent'; +import Resolvable from '@ulixee/commons/lib/Resolvable'; import { IBoundLog } from '@ulixee/commons/interfaces/ILog'; import Log from '@ulixee/commons/lib/Logger'; -import BrowserRequestMatcher from '@ulixee/hero-mitm/lib/BrowserRequestMatcher'; import MitmRequestContext from '@ulixee/hero-mitm/lib/MitmRequestContext'; import { IPuppetResourceRequest } from '@ulixee/hero-interfaces/IPuppetNetworkEvents'; import ResourcesTable from '../models/ResourcesTable'; @@ -14,6 +15,11 @@ import Session from './Session'; import { ICookie } from '@ulixee/hero-interfaces/ICookie'; import { Cookie } from 'tough-cookie'; import StorageChangesTable, { IStorageChangesEntry } from '../models/StorageChangesTable'; +import { IPuppetPageEvents } from '@ulixee/hero-interfaces/IPuppetPage'; +import IHttpResourceLoadDetails from '@ulixee/hero-interfaces/IHttpResourceLoadDetails'; +import HeadersHandler from '@ulixee/hero-mitm/handlers/HeadersHandler'; +import IResourceType from '@ulixee/hero-interfaces/IResourceType'; +import Tab from './Tab'; const { log } = Log(module); @@ -25,6 +31,7 @@ export default class Resources { public readonly resourcesById = new Map(); public readonly cookiesByDomain = new Map>(); + private readonly browserRequestIdToTabId = new Map(); private readonly mitmErrorsByUrl = new Map< string, { @@ -33,14 +40,12 @@ export default class Resources { }[] >(); + private readonly mitmRequestsPendingBrowserRequest: IMitmRequestPendingBrowserRequest[] = []; private readonly logger: IBoundLog; private readonly model: ResourcesTable; private readonly cookiesModel: StorageChangesTable; - constructor( - private readonly session: Session, - readonly browserRequestMatcher: BrowserRequestMatcher, - ) { + constructor(private readonly session: Session) { this.model = session.db.resources; this.cookiesModel = session.db.storageChanges; this.logger = log.createChild(module, { @@ -64,10 +69,27 @@ export default class Resources { this.get(resource.id).seenAtCommandId = atCommandId; } + public registerWebsocketHeaders( + tabId: number, + event: IPuppetPageEvents['websocket-handshake'], + ): void { + this.browserRequestIdToTabId.set(event.browserRequestId, tabId); + this.session.mitmRequestSession?.registerWebsocketHeaders(tabId, event); + } + + public cancelPending(): void { + for (const pending of this.mitmRequestsPendingBrowserRequest) { + if (pending.browserRequestedPromise.isResolved) continue; + pending.browserRequestedPromise.reject( + new CanceledPromiseError('Canceling: Mitm Request Session Closing'), + ); + } + } + /////// BROWSER REQUEST ID MAPPING /////////////////////////////////////////////////////////////////////////////////// public getBrowserRequestTabId(browserRequestId: string): number { - return this.browserRequestMatcher.requestIdToTabId.get(browserRequestId); + return this.browserRequestIdToTabId.get(browserRequestId); } public getBrowserRequestLatestResource(browserRequestId: string): IResourceMeta { @@ -99,6 +121,7 @@ export default class Resources { const ctx = MitmRequestContext.createFromPuppetResourceRequest(resourceRequest); const resourceDetails = MitmRequestContext.toEmittedResource(ctx); + resourceDetails.frameId = frameId; if (!resourceRequest.browserServedFromCache) { resourceDetails.body = await getBody(); if (resourceDetails.body) { @@ -109,11 +132,57 @@ export default class Resources { return this.record(tabId, resourceDetails, true); } - public onBrowserResourceLoaded( + /////// BROWSER REQUESTS ///////////////////////////////////////////////////////////////////////////////////////////// + + public onBrowserWillRequest( tabId: number, frameId: number, resource: IPuppetResourceRequest, - ): boolean { + ): IMitmRequestPendingBrowserRequest { + this.browserRequestIdToTabId.set(resource.browserRequestId, tabId); + let pendingRequest = this.findMatchingRequest(resource); + + if ( + pendingRequest && + pendingRequest.browserRequestedPromise.isResolved && + pendingRequest.browserRequestId + ) { + // figure out how long ago this request was + const requestTimeDiff = Math.abs(resource.requestTime - pendingRequest.requestTime); + if (requestTimeDiff > 5e3) pendingRequest = null; + } + + if (!pendingRequest) { + if (!resource.url) return; + pendingRequest = this.createPendingResource(resource); + } + + this.updatePendingResource(resource, pendingRequest, tabId, frameId); + + return pendingRequest; + } + + public onBrowserDidRequest( + tabId: number, + frameId: number, + resource: IPuppetResourceRequest, + ): void { + this.browserRequestIdToTabId.set(resource.browserRequestId, tabId); + + const pendingRequest = this.mitmRequestsPendingBrowserRequest.find( + x => x.browserRequestId === resource.browserRequestId, + ); + if (pendingRequest) { + Object.assign(pendingRequest, getHeaderDetails(resource)); + } + + const mitmResourceNeedsResolve = this.findMatchingRequest(resource, 'hasMitmResourceId'); + if (mitmResourceNeedsResolve && !mitmResourceNeedsResolve.browserRequestedPromise.isResolved) { + this.updatePendingResource(resource, mitmResourceNeedsResolve, tabId, frameId); + } + } + + public onBrowserResourceLoaded(tabId: number, resource: IPuppetResourceRequest): boolean { const knownResource = this.getBrowserRequestLatestResource(resource.browserRequestId); if (knownResource) { if (!knownResource.response) { @@ -122,39 +191,17 @@ export default class Resources { knownResource.response = resourceDetails.response; } knownResource.response.browserLoadedTime = resource.browserLoadedTime; - knownResource.frameId ??= frameId; this.model.updateReceivedTime(knownResource.id, resource.browserLoadedTime); return true; } - const isMitmError = this.matchesMitmError( + return this.matchesMitmError( tabId, resource.url.href, resource.method, resource.browserRequestId, resource.requestTime, ); - if (isMitmError) { - return true; - } - - return false; - } - - public onBrowserWillRequest( - tabId: number, - frameId: number, - resource: IPuppetResourceRequest, - ): void { - this.browserRequestMatcher.onBrowserRequestedResource(resource, tabId, frameId); - } - - public onBrowserDidRequest( - tabId: number, - frameId: number, - resource: IPuppetResourceRequest, - ): void { - this.browserRequestMatcher.onBrowserRequestedResourceExtraDetails(resource, tabId, frameId); } public onBrowserRequestFailed( @@ -163,13 +210,20 @@ export default class Resources { resource: IPuppetResourceRequest, loadError: Error, ): IResourceMeta { - const resourceId = - this.browserRequestMatcher.onBrowserRequestFailed({ - resource, - tabId, - frameId, - loadError, - }) ?? this.getBrowserRequestLatestResource(resource.browserRequestId)?.id; + this.browserRequestIdToTabId.set(resource.browserRequestId, tabId); + + const pendingRequest = + this.mitmRequestsPendingBrowserRequest.find( + x => x.browserRequestId === resource.browserRequestId, + ) ?? this.findMatchingRequest(resource, 'hasMitmResourceId'); + + let resourceId: number; + if (pendingRequest) { + this.updatePendingResource(resource, pendingRequest, tabId, frameId); + resourceId = pendingRequest.mitmResourceId; + if (resourceId) setTimeout(() => this.requestIsResolved(resourceId), 500).unref(); + } + resourceId ??= this.getBrowserRequestLatestResource(resource.browserRequestId)?.id; // this function will resolve any pending resourceId for a navigation return this.recordFailed( @@ -179,9 +233,90 @@ export default class Resources { ); } + /////// MITM REQUESTS //////////////////////////////////////////////////////////////////////////////////////////////// + + public onMitmRequest(request: IRequestSessionRequestEvent): void { + this.logger.info('MitmRequest', { + url: request.url.href, + method: request.request.method, + id: request.id, + }); + // don't know the tab id at this point + this.record(null, request, false); + } + + public determineResourceType(mitmResource: IHttpResourceLoadDetails): void { + const pendingBrowserRequest = + this.findMatchingRequest(mitmResource, 'noMitmResourceId') ?? + // if no request from browser (and unmatched), queue a new one + this.createPendingResource(mitmResource); + + pendingBrowserRequest.mitmResourceId = mitmResource.id; + pendingBrowserRequest.isHttp2Push = mitmResource.isHttp2Push; + + // NOTE: shared workers do not auto-register with chrome as of chrome 83, so we won't get a matching browserRequest + if ( + HeadersHandler.isWorkerDest(mitmResource, 'shared', 'service') || + mitmResource.resourceType === 'Websocket' + ) { + pendingBrowserRequest.browserRequestedPromise.resolve(null); + } + + mitmResource.browserHasRequested = pendingBrowserRequest.browserRequestedPromise.promise + .then(() => { + // eslint-disable-next-line promise/always-return + if (!pendingBrowserRequest?.browserRequestId) return; + mitmResource.resourceType = pendingBrowserRequest.resourceType; + mitmResource.browserRequestId = pendingBrowserRequest.browserRequestId; + mitmResource.hasUserGesture = pendingBrowserRequest.hasUserGesture; + mitmResource.documentUrl = pendingBrowserRequest.documentUrl; + mitmResource.browserFrameId = pendingBrowserRequest.frameId; + }) + .catch(() => null); + } + + public onMitmRequestError( + tabId: number, + event: IRequestSessionHttpErrorEvent, + error: Error, + ): IResourceMeta { + const request = event.request; + const resource = this.resourceEventToMeta(tabId, request); + this.model.insert(tabId, resource, null, request, error); + + if (!this.resourcesById.has(resource.id)) { + this.resourcesById.set(resource.id, resource); + } + + const url = resource.request?.url; + if (!request.browserRequestId && url) { + const existing = this.mitmErrorsByUrl.get(url) ?? []; + existing.push({ + resourceId: resource.id, + event, + }); + this.mitmErrorsByUrl.set(url, existing); + } + return resource; + } + + public onMitmResponse(response: IRequestSessionResponseEvent, defaultTab: Tab): IResourceMeta { + const tabId = this.getBrowserRequestTabId(response.browserRequestId); + const resource = this.record(tabId ?? defaultTab?.id, response, true); + this.requestIsResolved(resource.id); + return resource; + } + + private requestIsResolved(resourceId: number): void { + const matchIdx = this.mitmRequestsPendingBrowserRequest.findIndex( + x => x.mitmResourceId === resourceId, + ); + if (matchIdx >= 0) this.mitmRequestsPendingBrowserRequest.splice(matchIdx, 1); + } + /////// STORAGE ////////////////////////////////////////////////////////////////////////////////////////////////////// - public record( + private record( tabId: number, resourceEvent: IRequestSessionResponseEvent | IRequestSessionRequestEvent, isResponse: boolean, @@ -200,7 +335,7 @@ export default class Resources { return resource; } - public recordFailed( + private recordFailed( tabId: number, resourceFailedEvent: IRequestSessionResponseEvent, error: Error, @@ -272,31 +407,92 @@ export default class Resources { } } - /////// MITM ERRORS + /////// MITM <-> BROWSER MATCHING //////////////////////////////////////////////////////////////////////////////////// - public onMitmRequestError( + private updatePendingResource( + httpResourceLoad: IPuppetResourceRequest, + browserRequest: IMitmRequestPendingBrowserRequest, tabId: number, - event: IRequestSessionHttpErrorEvent, - error: Error, - ): IResourceMeta { - const request = event.request; - const resource = this.resourceEventToMeta(tabId, request); - this.model.insert(tabId, resource, null, request, error); + frameId: number, + ): void { + browserRequest.tabId ??= tabId; + browserRequest.frameId ??= frameId; + browserRequest.browserLoadedTime ??= httpResourceLoad.browserLoadedTime; + browserRequest.browserRequestId = httpResourceLoad.browserRequestId; + browserRequest.documentUrl = httpResourceLoad.documentUrl; + browserRequest.resourceType = httpResourceLoad.resourceType; + browserRequest.hasUserGesture = httpResourceLoad.hasUserGesture; + browserRequest.browserRequestedPromise.resolve(); + } - if (!this.resourcesById.has(resource.id)) { - this.resourcesById.set(resource.id, resource); - } + private createPendingResource( + request: Pick, + ): IMitmRequestPendingBrowserRequest { + const resource: IMitmRequestPendingBrowserRequest = { + url: request.url.href, + method: request.method, + requestTime: request.requestTime, + browserRequestedPromise: new Resolvable( + 5e3, + `BrowserRequestMatcher.ResourceNotResolved: ${request.method}: ${request.url}`, + ), + ...getHeaderDetails(request), + } as IMitmRequestPendingBrowserRequest; + this.mitmRequestsPendingBrowserRequest.push(resource); + return resource; + } - const url = resource.request?.url; - if (!request.browserRequestId && url) { - const existing = this.mitmErrorsByUrl.get(url) ?? []; - existing.push({ - resourceId: resource.id, - event, + private findMatchingRequest( + resourceToMatch: IPuppetResourceRequest, + filter?: 'noMitmResourceId' | 'hasMitmResourceId', + ): IMitmRequestPendingBrowserRequest | null { + const { method } = resourceToMatch; + const url = resourceToMatch.url?.href; + if (!url) return; + let matches = this.mitmRequestsPendingBrowserRequest.filter(x => { + return x.url === url && x.method === method; + }); + + if (resourceToMatch.browserRequestId) { + matches = matches.filter(x => { + if (x.browserRequestId) return x.browserRequestId === resourceToMatch.browserRequestId; + return true; }); - this.mitmErrorsByUrl.set(url, existing); } - return resource; + + if (filter === 'noMitmResourceId') { + matches = matches.filter(x => !x.mitmResourceId); + } + if (filter === 'hasMitmResourceId') { + matches = matches.filter(x => !!x.mitmResourceId); + } + + // if http2 push, we don't know what referer/origin headers the browser will use + // NOTE: we do this because it aligns the browserRequestId. We don't need header info + const h2Push = matches.find(x => x.isHttp2Push); + if (h2Push) return h2Push; + if (resourceToMatch.isHttp2Push && matches.length) return matches[0]; + + if (method === 'OPTIONS') { + const origin = HeadersHandler.getRequestHeader(resourceToMatch, 'origin'); + return matches.find(x => x.origin === origin); + } + + // if we have sec-fetch-dest headers, make sure they match + const secDest = HeadersHandler.getRequestHeader(resourceToMatch, 'sec-fetch-dest'); + if (secDest) { + matches = matches.filter(x => x.secFetchDest === secDest); + } + // if we have sec-fetch-dest headers, make sure they match + const secSite = HeadersHandler.getRequestHeader(resourceToMatch, 'sec-fetch-site'); + if (secSite) { + matches = matches.filter(x => x.secFetchSite === secSite); + } + + if (matches.length === 1) return matches[0]; + // otherwise, use referer + const referer = HeadersHandler.getRequestHeader(resourceToMatch, 'referer'); + return matches.find(x => x.referer === referer); } private recordCookies(tabId: number, responseEvent: IRequestSessionResponseEvent): void { @@ -437,6 +633,39 @@ export default class Resources { } } +function getHeaderDetails(httpResourceLoad: Pick): { + origin: string; + referer: string; + secFetchDest: string; + secFetchSite: string; +} { + const origin = HeadersHandler.getRequestHeader(httpResourceLoad, 'origin'); + const referer = HeadersHandler.getRequestHeader(httpResourceLoad, 'referer'); + const secFetchDest = HeadersHandler.getRequestHeader(httpResourceLoad, 'sec-fetch-dest'); + const secFetchSite = HeadersHandler.getRequestHeader(httpResourceLoad, 'sec-fetch-site'); + return { origin, referer, secFetchDest, secFetchSite }; +} + +interface IMitmRequestPendingBrowserRequest { + url: string; + method: string; + origin: string; + secFetchSite: string; + secFetchDest: string; + referer: string; + requestTime: number; + browserRequestedPromise: Resolvable; + tabId?: number; + frameId?: number; + mitmResourceId?: number; + browserLoadedTime?: number; + browserRequestId?: string; + resourceType?: IResourceType; + documentUrl?: string; + hasUserGesture?: boolean; + isHttp2Push?: boolean; +} + function areCookiesEqual(a: ICookie, b: ICookie): boolean { if ((a && !b) || (b && !a)) return false; if (a.name !== b.name) return false; diff --git a/core/lib/Session.ts b/core/lib/Session.ts index 64dcfd333..b6bc560db 100644 --- a/core/lib/Session.ts +++ b/core/lib/Session.ts @@ -180,14 +180,15 @@ export default class Session SessionsDb.find().recordSession(this); + this.resources = new Resources(this); this.mitmRequestSession = new RequestSession( this.id, this.plugins, this.options.upstreamProxyUrl, + this.resources, ); this.mitmRequestSession.respondWithHttpErrorStacks = this.mode === 'development' && this.options.showBrowserInteractions === true; - this.resources = new Resources(this, this.mitmRequestSession.browserRequestMatcher); this.websocketMessages = new WebsocketMessages(this.db); this.commands = new Commands(this.db); this.commandRecorder = new CommandRecorder(this, this, null, null, [ @@ -529,19 +530,12 @@ export default class Session } private onMitmRequest(event: IRequestSessionRequestEvent): void { - // don't know the tab id at this point - this.resources.record(null, event, false); + this.resources.onMitmRequest(event); } private onMitmResponse(event: IRequestSessionResponseEvent): void { - const tabId = this.resources.getBrowserRequestTabId(event.browserRequestId); - let tab = this.tabsById.get(tabId); - if (!tab && !tabId) { - // if we can't place it, just use the first active tab - tab = [...this.tabsById.values()].find(x => !x.isClosing); - } - - const resource = this.resources.record(tab?.id ?? tabId, event, true); + const resource = this.resources.onMitmResponse(event, this.getLastActiveTab()); + const tab = this.tabsById.get(resource.tabId); if (!event.didBlockResource) { tab?.emit('resource', resource); } diff --git a/core/lib/Tab.ts b/core/lib/Tab.ts index 2c1f11291..f0ede02b9 100644 --- a/core/lib/Tab.ts +++ b/core/lib/Tab.ts @@ -193,13 +193,11 @@ export default class Tab ): boolean { if (resource.type !== 'Document') return; - const frame = resource.frameId - ? this.frameEnvironmentsById.get(resource.frameId) - : this.frameWithPendingNavigation( - browserRequestId, - resource.request?.url, - resource.response?.url, - ); + const frame = this.frameWithPendingNavigation( + browserRequestId, + resource.request?.url, + resource.response?.url, + ); if (frame && !resource.isRedirect) { frame.navigations.onResourceLoaded(resource.id, resource.response?.statusCode, error); return true; @@ -823,7 +821,7 @@ export default class Tab // websockets page.on('websocket-handshake', ev => { - this.session.mitmRequestSession?.registerWebsocketHeaders(this.id, ev); + this.session.resources.registerWebsocketHeaders(this.id, ev); }); page.on('websocket-frame', this.onWebsocketFrame.bind(this)); } @@ -947,11 +945,7 @@ export default class Tab } } - const isKnownResource = this.session.resources.onBrowserResourceLoaded( - this.id, - frame.id, - resource, - ); + const isKnownResource = this.session.resources.onBrowserResourceLoaded(this.id, resource); if ( !isKnownResource && diff --git a/core/test/GlobalPool.test.ts b/core/test/GlobalPool.test.ts index 053c206f3..0a3c86062 100644 --- a/core/test/GlobalPool.test.ts +++ b/core/test/GlobalPool.test.ts @@ -61,6 +61,7 @@ describe('GlobalPool tests', () => { await Promise.all([tab1.session.close(), tab2.session.close(), tab3.session.close()]); expect(GlobalPool.activeSessionCount).toBe(0); + await Core.shutdown(); }, 15e3); it('should emit events when all session tabs are closed', async () => { diff --git a/core/test/resources.test.ts b/core/test/resources.test.ts index a728065bf..56063b53f 100644 --- a/core/test/resources.test.ts +++ b/core/test/resources.test.ts @@ -14,18 +14,29 @@ afterEach(Helpers.afterEach); test('loads http2 resources', async () => { const server = await Helpers.runHttp2Server((req, res) => { + if (req.url === '/img.png') { + // NOTE: chrome will still request this even though it's pushed + return res.destroy(); + } + res.stream.pushStream( + { + ':path': '/img.png', + ':method': 'GET', + }, + (err, pushStream) => { + pushStream.respond({ + ':status': 200, + 'content-type': 'image/png', + 'content-length': Buffer.byteLength(Helpers.getLogo()), + }); + pushStream.end(Helpers.getLogo()); + }, + ); res.stream.respond({ ':status': 200, 'content-type': 'text/html', }); - res.stream.pushStream({ ':path': '/img.png' }, (err, pushStream) => { - pushStream.respond({ - ':status': 200, - 'content-type': 'image/png', - }); - pushStream.end(Helpers.getLogo()); - }); - res.end(``); + res.stream.end(``); }); const meta = await connection.createSession(); diff --git a/fullstack/test/resources.test.ts b/fullstack/test/resources.test.ts index 2bb127406..1436a6f0d 100644 --- a/fullstack/test/resources.test.ts +++ b/fullstack/test/resources.test.ts @@ -13,7 +13,8 @@ beforeAll(async () => { `; @@ -52,11 +53,16 @@ describe('basic resource tests', () => { const elem = await hero.document.querySelector('a'); const startCommandId = await hero.lastCommandId; await hero.click(elem); - await hero.click(elem); const resources = await hero.waitForResource({ type: 'Fetch' }); expect(resources).toHaveLength(1); + await hero.interact({ move: elem }); + await hero.click(elem); + + const resources2 = await hero.waitForResource({ type: 'Fetch' }); + expect(resources2).toHaveLength(1); + let counter = 0; const allResources = await hero.waitForResource( { diff --git a/mitm/handlers/HeadersHandler.ts b/mitm/handlers/HeadersHandler.ts index 1acfaeabc..321a359f4 100644 --- a/mitm/handlers/HeadersHandler.ts +++ b/mitm/handlers/HeadersHandler.ts @@ -48,11 +48,10 @@ export default class HeadersHandler { if (hasUserActivity === '?1') ctx.hasUserGesture = true; if (fetchMode) ctx.isUserNavigation = isDocumentNavigation && ctx.hasUserGesture; - const requestedResource = session.browserRequestMatcher.onMitmRequestedResource(ctx); + session.browserRequestMatcher.determineResourceType(ctx); if (ctx.resourceType === 'Websocket') { ctx.browserRequestId = await session.getWebsocketUpgradeRequestId(requestHeaders); - requestedResource.browserRequestedPromise.resolve(null); } else if (!ctx.resourceType || ctx.resourceType === 'Fetch') { // if fetch, we need to wait for the browser request so we can see if we should use xhr order or fetch order await ctx.browserHasRequested; @@ -204,7 +203,6 @@ export default class HeadersHandler { } } - const headerCharRegex = /[^\t\x20-\x7e\x80-\xff]/; /** * True if val contains an invalid field-vchar diff --git a/mitm/handlers/RequestSession.ts b/mitm/handlers/RequestSession.ts index 956dc84c5..dae9d9335 100644 --- a/mitm/handlers/RequestSession.ts +++ b/mitm/handlers/RequestSession.ts @@ -17,7 +17,7 @@ import MitmRequestAgent from '../lib/MitmRequestAgent'; import IMitmRequestContext from '../interfaces/IMitmRequestContext'; import { Dns } from '../lib/Dns'; import ResourceState from '../interfaces/ResourceState'; -import BrowserRequestMatcher from '../lib/BrowserRequestMatcher'; +import IBrowserRequestMatcher from '../interfaces/IBrowserRequestMatcher'; const { log } = Log(module); @@ -48,7 +48,6 @@ export default class RequestSession extends TypedEventEmitter(); diff --git a/mitm/interfaces/IBrowserRequestMatcher.ts b/mitm/interfaces/IBrowserRequestMatcher.ts new file mode 100644 index 000000000..2613fafe5 --- /dev/null +++ b/mitm/interfaces/IBrowserRequestMatcher.ts @@ -0,0 +1,6 @@ +import IHttpResourceLoadDetails from '@ulixee/hero-interfaces/IHttpResourceLoadDetails'; + +export default interface IBrowserRequestMatcher { + cancelPending(): void; + determineResourceType(resource: IHttpResourceLoadDetails): void; +} diff --git a/mitm/lib/BrowserRequestMatcher.ts b/mitm/lib/BrowserRequestMatcher.ts deleted file mode 100644 index b7cb18d72..000000000 --- a/mitm/lib/BrowserRequestMatcher.ts +++ /dev/null @@ -1,284 +0,0 @@ -import IResourceType from '@ulixee/hero-interfaces/IResourceType'; -import IResolvablePromise from '@ulixee/commons/interfaces/IResolvablePromise'; -import { IBoundLog } from '@ulixee/commons/interfaces/ILog'; -import Log from '@ulixee/commons/lib/Logger'; -import { CanceledPromiseError } from '@ulixee/commons/interfaces/IPendingWaitEvent'; -import Resolvable from '@ulixee/commons/lib/Resolvable'; -import { IPuppetResourceRequest } from '@ulixee/hero-interfaces/IPuppetNetworkEvents'; -import IHttpResourceLoadDetails from '@ulixee/hero-interfaces/IHttpResourceLoadDetails'; -import RequestSession from '../handlers/RequestSession'; -import HeadersHandler from '../handlers/HeadersHandler'; - -const { log } = Log(module); - -export default class BrowserRequestMatcher { - public requestIdToTabId = new Map(); - - protected readonly logger: IBoundLog; - - private readonly requestedResources: IRequestedResource[] = []; - - constructor(requestSession: RequestSession) { - this.logger = log.createChild(module, { - sessionId: requestSession.sessionId, - }); - requestSession.on('response', event => this.clearRequest(event.id)); - } - - public cancelPending(): void { - for (const pending of this.requestedResources) { - clearTimeout(pending.resolveTimeout); - pending.browserRequestedPromise.reject( - new CanceledPromiseError('Canceling: Mitm Request Session Closing'), - ); - } - } - - public onMitmRequestedResource(mitmResource: IHttpResourceLoadDetails): IRequestedResource { - const pendingBrowserRequest = - this.findMatchingRequest(mitmResource, 'noMitmResourceId') ?? - // if no request from browser (and unmatched), queue a new one - this.queuePendingBrowserRequest(mitmResource); - - pendingBrowserRequest.mitmResourceId = mitmResource.id; - - // NOTE: shared workers do not auto-register with chrome as of chrome 83, so we won't get a matching browserRequest - if (HeadersHandler.isWorkerDest(mitmResource, 'shared', 'service')) { - pendingBrowserRequest.browserRequestedPromise.resolve(null); - } - - mitmResource.browserHasRequested = pendingBrowserRequest.browserRequestedPromise.promise - .then( - this.copyBrowserRequestAttributesToResource.bind(this, mitmResource, pendingBrowserRequest), - ) - .catch(() => clearTimeout(pendingBrowserRequest.resolveTimeout)); - - return pendingBrowserRequest; - } - - public onBrowserRequestedResourceExtraDetails( - httpResourceLoad: IPuppetResourceRequest, - tabId?: number, - frameId?: number, - ): void { - const match = this.requestedResources.find( - x => x.browserRequestId === httpResourceLoad.browserRequestId, - ); - if (!match) return; - Object.assign(match, getHeaderDetails(httpResourceLoad)); - - const mitmResourceNeedsResolve = this.findMatchingRequest( - httpResourceLoad, - 'hasMitmResourceId', - ); - if (mitmResourceNeedsResolve && !mitmResourceNeedsResolve.browserRequestedPromise.isResolved) { - this.updatePendingResource(httpResourceLoad, mitmResourceNeedsResolve, tabId, frameId); - } - } - - public onBrowserRequestedResource( - httpResourceLoad: IPuppetResourceRequest, - tabId?: number, - frameId?: number, - ): IRequestedResource { - let pendingRequest = this.findMatchingRequest(httpResourceLoad); - - if ( - pendingRequest && - pendingRequest.browserRequestedPromise.isResolved && - pendingRequest.browserRequestId - ) { - // figure out how long ago this request was - const requestTimeDiff = Math.abs(httpResourceLoad.requestTime - pendingRequest.requestTime); - if (requestTimeDiff > 5e3) pendingRequest = null; - } - - if (!pendingRequest) { - if (!httpResourceLoad.url) return; - pendingRequest = this.createPendingResource(httpResourceLoad); - } - - this.updatePendingResource(httpResourceLoad, pendingRequest, tabId, frameId); - - return pendingRequest; - } - - public onBrowserRequestFailed(event: { - resource: IPuppetResourceRequest; - tabId: number; - frameId?: number; - loadError: Error; - }): number { - this.requestIdToTabId.set(event.resource.browserRequestId, event.tabId); - const pendingRequest = - this.requestedResources.find(x => x.browserRequestId === event.resource.browserRequestId) ?? - this.findMatchingRequest(event.resource, 'hasMitmResourceId'); - if (pendingRequest) { - this.updatePendingResource(event.resource, pendingRequest, event.tabId, event.frameId); - const id = pendingRequest.mitmResourceId; - if (id) setTimeout(() => this.clearRequest(id), 500).unref(); - return id; - } - this.logger.warn('BrowserViewOfResourceLoad::Failed', { - ...event, - }); - } - - private updatePendingResource( - httpResourceLoad: IPuppetResourceRequest, - browserRequest: IRequestedResource, - tabId: number, - frameId: number, - ): void { - if (tabId) { - browserRequest.tabId = tabId; - this.requestIdToTabId.set(httpResourceLoad.browserRequestId, tabId); - } - browserRequest.frameId ??= frameId; - browserRequest.browserLoadedTime ??= httpResourceLoad.browserLoadedTime; - browserRequest.browserRequestId = httpResourceLoad.browserRequestId; - browserRequest.documentUrl = httpResourceLoad.documentUrl; - browserRequest.resourceType = httpResourceLoad.resourceType; - browserRequest.hasUserGesture = httpResourceLoad.hasUserGesture; - browserRequest.browserRequestedPromise.resolve(); - } - - private copyBrowserRequestAttributesToResource( - mitmResource: IHttpResourceLoadDetails, - browserRequest: IRequestedResource, - ): void { - if (!browserRequest?.browserRequestId) return; - clearTimeout(browserRequest.resolveTimeout); - mitmResource.resourceType = browserRequest.resourceType; - mitmResource.browserRequestId = browserRequest.browserRequestId; - mitmResource.hasUserGesture = browserRequest.hasUserGesture; - mitmResource.documentUrl = browserRequest.documentUrl; - mitmResource.browserFrameId = browserRequest.frameId; - } - - private clearRequest(resourceId: number): void { - const matchIdx = this.requestedResources.findIndex(x => x.mitmResourceId === resourceId); - if (matchIdx >= 0) this.requestedResources.splice(matchIdx, 1); - } - - private createPendingResource( - request: Pick, - ): IRequestedResource { - const resource: IRequestedResource = { - url: request.url.href, - method: request.method, - requestTime: request.requestTime, - browserRequestedPromise: new Resolvable(), - ...getHeaderDetails(request), - } as IRequestedResource; - this.requestedResources.push(resource); - return resource; - } - - private queuePendingBrowserRequest(mitmResource: IHttpResourceLoadDetails): IRequestedResource { - const pendingRequest = this.createPendingResource(mitmResource); - pendingRequest.mitmResourceId = mitmResource.id; - pendingRequest.isHttp2Push = mitmResource.isHttp2Push; - const toLog = { - request: { - url: pendingRequest.url, - method: pendingRequest.method, - id: pendingRequest.mitmResourceId, - }, - }; - pendingRequest.resolveTimeout = setTimeout(() => { - this.logger.warn('BrowserRequestMatcher.ResourceNotResolved', toLog); - pendingRequest.browserRequestedPromise.reject( - new Error('BrowserRequestMatcher.ResourceNotResolved'), - ); - }, 5e3).unref(); - return pendingRequest; - } - - private findMatchingRequest( - resourceToMatch: IPuppetResourceRequest, - filter?: 'noMitmResourceId' | 'hasMitmResourceId', - ): IRequestedResource | null { - const { method } = resourceToMatch; - const url = resourceToMatch.url?.href; - if (!url) return; - let matches = this.requestedResources.filter(x => { - return x.url === url && x.method === method; - }); - - if (resourceToMatch.browserRequestId) { - matches = matches.filter(x => { - if (x.browserRequestId) return x.browserRequestId === resourceToMatch.browserRequestId; - return true; - }); - } - - if (filter === 'noMitmResourceId') { - matches = matches.filter(x => !x.mitmResourceId); - } - if (filter === 'hasMitmResourceId') { - matches = matches.filter(x => !!x.mitmResourceId); - } - - // if http2 push, we don't know what referer/origin headers the browser will use - // NOTE: we do this because it aligns the browserRequestId. We don't need header info - const h2Push = matches.find(x => x.isHttp2Push); - if (h2Push) return h2Push; - if (resourceToMatch.isHttp2Push && matches.length) return matches[0]; - - if (method === 'OPTIONS') { - const origin = HeadersHandler.getRequestHeader(resourceToMatch, 'origin'); - return matches.find(x => x.origin === origin); - } - - // if we have sec-fetch-dest headers, make sure they match - const secDest = HeadersHandler.getRequestHeader(resourceToMatch, 'sec-fetch-dest'); - if (secDest) { - matches = matches.filter(x => x.secFetchDest === secDest); - } - // if we have sec-fetch-dest headers, make sure they match - const secSite = HeadersHandler.getRequestHeader(resourceToMatch, 'sec-fetch-site'); - if (secSite) { - matches = matches.filter(x => x.secFetchSite === secSite); - } - - if (matches.length === 1) return matches[0]; - // otherwise, use referer - const referer = HeadersHandler.getRequestHeader(resourceToMatch, 'referer'); - return matches.find(x => x.referer === referer); - } -} - -function getHeaderDetails(httpResourceLoad: Pick): { - origin: string; - referer: string; - secFetchDest: string; - secFetchSite: string; -} { - const origin = HeadersHandler.getRequestHeader(httpResourceLoad, 'origin'); - const referer = HeadersHandler.getRequestHeader(httpResourceLoad, 'referer'); - const secFetchDest = HeadersHandler.getRequestHeader(httpResourceLoad, 'sec-fetch-dest'); - const secFetchSite = HeadersHandler.getRequestHeader(httpResourceLoad, 'sec-fetch-site'); - return { origin, referer, secFetchDest, secFetchSite }; -} - -interface IRequestedResource { - url: string; - method: string; - origin: string; - secFetchSite: string; - secFetchDest: string; - referer: string; - requestTime: number; - resolveTimeout?: NodeJS.Timeout; - browserRequestedPromise: IResolvablePromise; - tabId?: number; - frameId?: number; - mitmResourceId?: number; - browserLoadedTime?: number; - browserRequestId?: string; - resourceType?: IResourceType; - documentUrl?: string; - hasUserGesture?: boolean; - isHttp2Push?: boolean; -} diff --git a/mitm/test/basic.test.ts b/mitm/test/basic.test.ts index 37b226e2f..b57868440 100644 --- a/mitm/test/basic.test.ts +++ b/mitm/test/basic.test.ts @@ -2,10 +2,9 @@ import * as http from 'http'; import { IncomingHttpHeaders } from 'http'; import { Helpers } from '@ulixee/hero-testing'; import * as HttpProxyAgent from 'http-proxy-agent'; -import { URL } from 'url'; +import * as Url from 'url'; import { AddressInfo } from 'net'; import * as WebSocket from 'ws'; -import * as Url from 'url'; import { createPromise } from '@ulixee/commons/lib/utils'; import IHttpResourceLoadDetails from '@ulixee/hero-interfaces/IHttpResourceLoadDetails'; import BrowserEmulator from '@ulixee/default-browser-emulator'; @@ -18,6 +17,7 @@ import MitmServer from '../lib/MitmProxy'; import HeadersHandler from '../handlers/HeadersHandler'; import HttpUpgradeHandler from '../handlers/HttpUpgradeHandler'; import { parseRawHeaders } from '../lib/Utils'; +import IBrowserRequestMatcher from '../interfaces/IBrowserRequestMatcher'; const { log } = Log(module); const browserEmulatorId = BrowserEmulator.id; @@ -347,21 +347,13 @@ describe('basic MitM tests', () => { Helpers.needsClosing.push(mitmServer); const proxyHost = `http://localhost:${mitmServer.port}`; - const session = createSession(mitmServer); + const session = createSession(mitmServer, null, { + determineResourceType(resource: IHttpResourceLoadDetails) { + resource.resourceType = 'Document'; + }, + cancelPending() {}, + }); session.plugins.beforeHttpRequest = jest.fn(); - session.browserRequestMatcher.onBrowserRequestedResource( - { - browserRequestId: '25.123', - url: new URL(`${httpServer.url}page1`), - method: 'GET', - resourceType: 'Document', - hasUserGesture: true, - isUserNavigation: true, - requestHeaders: {}, - documentUrl: `${httpServer.url}page1`, - } as IHttpResourceLoadDetails, - 1, - ); const onresponse = jest.fn(); const onError = jest.fn(); session.on('response', onresponse); @@ -391,9 +383,18 @@ describe('basic MitM tests', () => { }); }); -function createSession(mitmProxy: MitmServer, upstreamProxyUrl: string = null) { +function createSession( + mitmProxy: MitmServer, + upstreamProxyUrl: string = null, + browserMatcher?: IBrowserRequestMatcher, +) { const plugins = new CorePlugins({ browserEmulatorId, selectBrowserMeta }, log as IBoundLog); - const session = new RequestSession(`${(sessionCounter += 1)}`, plugins, upstreamProxyUrl); + const session = new RequestSession( + `${(sessionCounter += 1)}`, + plugins, + upstreamProxyUrl, + browserMatcher, + ); mitmProxy.registerSession(session, false); Helpers.needsClosing.push(session);