diff --git a/README.md b/README.md index 1708e61a3..3972095c0 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,10 @@ Browse the [full API docs](https://ulixee.org/docs). ## Contributing +See [how-to-contribute.md](docs/main/Contribute/how-to-contribute.md) for ways to get started. + +This project has a [code of conduct](docs/main/Contribute/code-of-conduct.md). By interacting with this repository, organization, or community you agree to abide by its terms. + We'd love your help in making Hero a better tool. Please don't hesitate to send a pull request. ## License diff --git a/client/lib/DomExtender.ts b/client/lib/DomExtender.ts index 673c64863..7e3e66d52 100644 --- a/client/lib/DomExtender.ts +++ b/client/lib/DomExtender.ts @@ -23,6 +23,7 @@ import XPathResult from 'awaited-dom/impl/official-klasses/XPathResult'; import { createSuperNode } from 'awaited-dom/impl/create'; import { getAwaitedPathAsMethodArg } from './SetupAwaitedHandler'; import { KeyboardShortcuts } from '@ulixee/hero-interfaces/IKeyboardShortcuts'; +import { scriptInstance } from './internal'; const awaitedPathState = StateMachine< any, @@ -34,7 +35,7 @@ interface IBaseExtendNode { $exists: Promise; $isClickable: Promise; $hasFocus: Promise; - $clearValue(): Promise; + $clearInputText(): Promise; $click(verification?: IElementInteractVerification): Promise; $type(...typeInteractions: ITypeInteraction[]): Promise; $waitForExists(options?: { timeoutMs?: number }): Promise; @@ -68,7 +69,10 @@ declare module 'awaited-dom/base/interfaces/official' { interface IHTMLCollection extends IBaseExtendNodeList {} } -type INodeExtensionFns = Omit; +type INodeExtensionFns = Omit< + IBaseExtendNode, + '$isClickable' | '$isVisible' | '$exists' | '$hasFocus' +>; const NodeExtensionFns: INodeExtensionFns = { async $click(verification: IElementInteractVerification = 'elementAtPath'): Promise { const coreFrame = await getCoreFrame(this); @@ -114,14 +118,23 @@ const NodeExtensionFns: INodeExtensionFns = { ...options, }); }, - async $clearValue(): Promise { + async $clearInputText(): Promise { const { awaitedOptions } = awaitedPathState.getState(this); const coreFrame = await awaitedOptions.coreFrame; - await Interactor.run(coreFrame, [ - { click: this }, - { keyShortcut: KeyboardShortcuts.selectAll }, - { keyPress: KeyboardKey.Backspace }, - ]); + const callsitePath = scriptInstance.getScriptCallsite(); + await coreFrame.coreTab.runFlowCommand( + async () => { + await this.focus(); + await Interactor.run(coreFrame, [ + { keyShortcut: KeyboardShortcuts.selectAll }, + { keyPress: KeyboardKey.Backspace }, + ]); + }, + assert => { + assert(this.value, x => !x); + }, + callsitePath, + ); }, $xpathSelector(selector: string, orderedNodeResults = false): ISuperNode { const { awaitedOptions, awaitedPath } = awaitedPathState.getState(this); @@ -230,6 +243,4 @@ async function getCoreFrame(element: ISuperElement): Promise(NodeExtensionFns, NodeExtensionGetters); extendNodeLists(NodeListExtensionFns); -export { - awaitedPathState, -}; +export { awaitedPathState }; diff --git a/client/lib/FlowCommands.ts b/client/lib/FlowCommands.ts index 4d2abea3d..04b1a8442 100644 --- a/client/lib/FlowCommands.ts +++ b/client/lib/FlowCommands.ts @@ -32,27 +32,29 @@ export default class FlowCommands { flowCommand = this.flowCommands.find( x => x.parentId === parentFlow.id && callsiteJson === JSON.stringify(x.callsitePath), ) as any; + } + if (flowCommand) { flowCommand.retryNumber += 1; return flowCommand; - } else { - flowCommand = new FlowCommand( - this.coreTab, - commandFn, - exitState, - id, - parentFlow, - callsitePath, - options, - ); - this.flowCommands.push(flowCommand); - await this.coreTab.commandQueue.runOutOfBand( - 'Tab.registerFlowCommand', - flowCommand.id, - flowCommand.parentId, - callsitePath, - ); } + flowCommand = new FlowCommand( + this.coreTab, + commandFn, + exitState, + id, + parentFlow, + callsitePath, + options, + ); + this.flowCommands.push(flowCommand); + await this.coreTab.commandQueue.runOutOfBand( + 'Tab.registerFlowCommand', + flowCommand.id, + flowCommand.parentId, + callsitePath, + ); + return flowCommand; } diff --git a/docs/main/Advanced/ConnectionToCore.md b/docs/main/Advanced/ConnectionToCore.md index 1149e5075..e3bfc6361 100644 --- a/docs/main/Advanced/ConnectionToCore.md +++ b/docs/main/Advanced/ConnectionToCore.md @@ -20,30 +20,22 @@ const { Hero: FullHero } = require('@ulixee'); There are 2 built-in connections in Hero: -- `Default` - instantiates and connects to a locally install Hero `Core` -- `RemoteConnectionToCore` - takes a host to dial over tcp. See more [here](/docs/advanced/remote) +- `Default` - instantiates and connects to a locally install Hero `Core` (used by `@ulixee/hero-fullstack`) +- `RemoteConnectionToCore` - takes a host to dial over a Websocket. See more [here](/docs/advanced/remote) ### Configuration {#configuration} -When you provide a connectionToCore to a [Handler](/docs/basic-interfaces/handler) or [Hero](/docs/basic-interfaces/hero), Hero will accept either an `options` object or a `RemoteConnectionToCore` instance. - -The following methods allow you to configure the `connectionToCore` - -- [hero.configure()](/docs/basic-interfaces/hero#configure) - apply the connection to the default hero, or to a an hero constructed prior to the first connection. -- [new Hero()](/docs/basic-interfaces/hero#constructor) - the new hero will use this connection. -- [new Handler(...connections)](/docs/basic-interfaces/handler#constructor) - a handler takes one or more coreClientConnection options or instances. +When you provide a connectionToCore to a [Hero](/docs/basic-interfaces/hero) instance, Hero will accept either an `options` object or a `ConnectionToCore` instance. ### Options {#options} -The provided settings configure the connection to `Core`. Note: some configurations will apply to all connected Heros and Handlers (`localProxyPortStart`, `sessionsDir`, `replayServerPort`). +The provided settings configure the connection to `Core`. Note: some configurations will apply to all connected Heroes ( `dataDir`). - options `object`. A set of settings that controls the creation of a "connection" to a `Hero Core`. - host `string`. An optional `hostname:port` url that will be used to establish a connection to a Hero Core running on another machine. If no host is provided, a connection to a "locally" running `Core` will be attempted. - - maxConcurrency `number`. The max number of Heros to allow to be dispatched and created at the same time. Heros are "active" until the dispatchHero callback is complete, or the created Hero is closed. If not provided, this number will match the max allowed by a `Core`. + - maxConcurrency `number`. The max number of Heroes to allow to be dispatched and created at the same time. Heroes are "active" until the dispatchHero callback is complete, or the created Hero is closed. If not provided, this number will match the max allowed by a `Core`. - heroTimeoutMillis `number`. The number of milliseconds to give each Hero in this connection to complete a session. A TimeoutError will be thrown if this time is exceeded. - - localProxyPortStart `number` defaults to `any open port`. Starting internal port to use for the mitm proxy. - - sessionsDir `string` defaults to `os.tmpdir()/.ulixee`. Directory to store session files and mitm certificates. - - replayServerPort `number`. Port to start a live replay server on. Defaults to "any open port". + - dataDir `string` defaults to `os.tmpdir()/.ulixee`. Directory to store session files and mitm certificates. - connection `CoreClientConnection`. A pre-initialized connection to a `Hero Core`. You can use this option to pre-check your connection to a remote connection, or to provide customization to the connection. ## Methods @@ -52,7 +44,7 @@ The provided settings configure the connection to `Core`. Note: some configurati Initializes the connection to the specified core. You can use this function if you would like to pre-connect to your remote host and ensure connections are properly established before continuing. -NOTE: this will be automatically called when you use a Handler or Hero and pass in this connection. +NOTE: this will be automatically called when you pass in a connection to Hero. #### **Returns**: `Promise` diff --git a/docs/main/Advanced/Remote.md b/docs/main/Advanced/Remote.md index 5424d8473..b2743b59e 100644 --- a/docs/main/Advanced/Remote.md +++ b/docs/main/Advanced/Remote.md @@ -2,7 +2,7 @@ Hero operates out of the box over WebSockets. You'll eventually want to launch Core on a server where clients can remotely access it. -You'll need a simple script to start the server on the machine where the `ulixee` npm package is installed. Make sure to open the port you allocate on any firewall that a client might have to pass through: +You'll need a simple script to start the server on the machine where the `@ulixee/hero` npm package is installed. Make sure to open the port you allocate on any firewall that a client might have to pass through: ## Setting Up a Server Process @@ -23,7 +23,7 @@ const Core = require('@ulixee/hero-core'); ## Setting Up the Client -Your [Hero](/docs/basic-interfaces/hero) or [Handler](/docs/basic-interfaces/handler) must be configured to point at this Remote Core (and any others you've set up). +Your [Hero](/docs/basic-interfaces/hero) instance must be configured to point at this Remote Core (and any others you've set up). NOTE: you can use the `@ulixee/hero` npm package if you don't want to install a full browser engine on the machine coordinating all your scrapes. That example is shown below. @@ -31,8 +31,7 @@ NOTE: you can use the `@ulixee/hero` npm package if you don't want to install a const Hero = require('@ulixee/hero'); (async () => { - const hero = new Hero(); - await hero.configure({ + const hero = new Hero({ connectionToCore: { host: 'localhost:7007', }, diff --git a/docs/main/Advanced/Session.md b/docs/main/Advanced/Session.md index 78fa3f5c2..23190d681 100644 --- a/docs/main/Advanced/Session.md +++ b/docs/main/Advanced/Session.md @@ -8,7 +8,7 @@ Sessions store data into a Sqlite database using a module called SessionState. T By default, session databases are located in `os.tmpdir()\.ulixee`. Tmpdir refers to the NodeJs function in the [OS module](https://nodejs.org/api/os.html#os_os_tmpdir). -You can control the location sessions are stored using the [`sessionsDir`](/docs/overview/configuration#sessions-dir) configuration when starting a Core server. +You can control the location sessions are stored using the [`dataDir`](/docs/overview/configuration#data-dir) configuration when starting a Core server. ### Managing Sessions diff --git a/docs/main/Advanced/UserProfile.md b/docs/main/Advanced/UserProfile.md index 9d59f3a88..194ccff2c 100644 --- a/docs/main/Advanced/UserProfile.md +++ b/docs/main/Advanced/UserProfile.md @@ -24,7 +24,7 @@ const theStoredProfile = await hero.exportUserProfile(); // This browser will be instantiated with all the cookies // dom storage, etc from the prior session. -const heroWithProfile = await handler.createHero({ +const heroWithProfile = new Hero({ userProfile: theStoredProfile, }); ``` diff --git a/docs/main/BasicInterfaces/AwaitedDOM.base.md b/docs/main/BasicInterfaces/AwaitedDOM.base.md index f5ca9ebfe..e3613bae6 100644 --- a/docs/main/BasicInterfaces/AwaitedDOM.base.md +++ b/docs/main/BasicInterfaces/AwaitedDOM.base.md @@ -14,6 +14,8 @@ Supers give you access to all properties and methods of dependent classes. [INTERFACES:Super] +Some helpers are added to the Super classes to make using Hero more intuitive. Find a list [here](/docs/basic-interfaces/dom-extenders) + ## Document Interfaces [INTERFACES:Document] diff --git a/docs/main/BasicInterfaces/DomExtenders.md b/docs/main/BasicInterfaces/DomExtenders.md new file mode 100644 index 000000000..af308c47b --- /dev/null +++ b/docs/main/BasicInterfaces/DomExtenders.md @@ -0,0 +1,209 @@ +# DomExtenders + +> Dom Extenders add functionality to the W3C spec AwaitedDom to make using Hero easier. All DomExtenders start with a $. + +## Constructor + +DomExtenders cannot be constructed. They're additions added to the following Super classes and collections. + +#### Nodes: {#super-nodes} + +- [`SuperElement`](/docs/awaited-dom/super-element) +- [`SuperNode`](/docs/awaited-dom/super-node) +- [`SuperHTMLElement`](/docs/awaited-dom/super-html-element) + +#### Collections: {#super-collections} + +- [`SuperNodeList`](/docs/awaited-dom/super-node-list) +- [`SuperHTMLCollection`](/docs/awaited-dom/super-html-collection) + +## Properties + +### node.$exists {#exists} + +Checks if a given node is valid and retrievable in the DOM. This API is used mostly to determine if a querySelector can be resolved. + +``` + await hero.querySelector('.not-in-dom').$exists; // false if not in dom! +``` + +Attached to Nodes and Elements ([see list](#super-nodes)). + +#### **Returns**: `Promise` + +### node.$hasFocus {#has-focus} + +Checks if a given node has focus in the DOM. Useful for form interactions. + +``` + const hasFocus = await hero.querySelector('.field').$hasFocus; + if (!hasFocus) await hero.querySelector('.field').focus(); +``` + +Attached to Nodes and Elements ([see list](#super-nodes)). + +#### **Returns**: `Promise` + +### node.$isClickable {#is-clickable} + +Checks if a given node is visible in the DOM, scrolled into view, and not masked by any other node. Follows the specification of `isClickable` from [tab.getComputedVisibility()](/docs/basic-interfaces/tab#get-computed-visibility). + +Attached to Nodes and Elements ([see list](#super-nodes)). + +``` + await hero.querySelector('.element').$isClickable; +``` + +#### **Returns**: `Promise` + +### node.$isVisible {#is-visible} + +Checks if a given node is visible in the DOM. Follows the specification of `isVisible` from [tab.getComputedVisibility()](/docs/basic-interfaces/tab#get-computed-visibility). + +NOTE: this does not mean the node is scrolled into view. + +``` + await hero.querySelector('.element').$isVisible; +``` + +Attached to Nodes and Elements ([see list](#super-nodes)). + +#### **Returns**: `Promise` + +## Methods + +### node.$clearInputText*()* {#clear-value} + +Clears out the value of an input field by performing a Focus, Select All, and Backspace. + +Attached to Nodes and Elements ([see list](#super-nodes)). + +#### **Returns**: `Promise` + +### node.$click*(verification)* {#click} + +A normal DOM node has a `click()` API on it, but it does not trigger human-like behavior or mouse events resembling the actions of a normal user. For that reason, it can be detected if a given website is looking for it. + +The `$click()` API triggers clicking on the given node using the [Human Emulator](/docs/plugins/human-emulators) functionality. + +Attached to Nodes and Elements ([see list](#super-nodes)). + +#### **Arguments**: + +- verification `elementAtPath` | `exactElement` | `none`. Default `elementAtPath`. Determines what [verification](/docs/basic-interfaces/interactions#click-verification) should be used in this operation. A verification determines how to recover from the node disappearing from the DOM during execution. + +#### **Returns**: `Promise` + +### node.$type*(...typeInteractions)* {#type} + +Perform a typing interaction on the given node. This is a shortcut for `focusing` on an input and then performing `keyboard` operations using the [Human Emulator](/docs/plugins/human-emulators) functionality. + +``` + await hero.querySelector('.field').$type('fill-in', KeyboardKey.Enter); +``` + +Attached to Nodes and Elements ([see list](#super-nodes)). + +#### **Arguments**: + +- typeInteractions `ITypeInteraction[]`. One or more interactions to trigger using the keyboard. TypeInteractions can be strings or `KeyboardKey` values (exported from the Hero client). + +#### **Returns**: `Promise` + +### node.$waitForExists*(options?)* {#wait-for-exists} + +Wait for the given Node "Path" to exist in the DOM. Returns the resolved SuperElement. + +``` + await hero.querySelector('.not.here.yet').$waitForExists(); // waits until this querySelector resolves. +``` + +Attached to Nodes and Elements ([see list](#super-nodes)). + +#### **Arguments**: + +- options `object`. Optional options. + - timeoutMs `number`. The default timeout. + +#### **Returns**: `Promise` + +### node.$waitForClickable*(options?)* {#wait-for-clickable} + +Wait for the given Node "Path" to be clickable in the DOM (visible, scrolled into the viewport and unobstructed). + +NOTE: this API will _not_ scroll a node into view that is offscreen. + +Attached to Nodes and Elements ([see list](#super-nodes)). + +#### **Arguments**: + +- options `object`. Optional options. + - timeoutMs `number`. The default timeout. + +#### **Returns**: `Promise` + +### node.$waitForHidden*(options?)* {#wait-for-hidden} + +Wait for the given Node "Path" to be unavailable in the DOM (not visible in the DOM or does not exist). + +This API can be useful to wait for a modal/popup window to disppear after you click close on it. + +Attached to Nodes and Elements ([see list](#super-nodes)). + +#### **Arguments**: + +- options `object`. Optional options. + - timeoutMs `number`. The default timeout. + +#### **Returns**: `Promise` + +### node.$waitForVisible*(options?)* {#wait-for-visible} + +Wait for the given Node "Path" to be visible in the DOM. + +Visible follows the API defined at: [`tab.getComputedVisibility`](/docs/basic-interfaces/tab#get-computed-visibility) + +Attached to Nodes and Elements ([see list](#super-nodes)). + +#### **Arguments**: + +- options `object`. Optional options. + - timeoutMs `number`. The default timeout. + +#### **Returns**: `Promise` + +### node.$xpathSelector*(selector)* {#xpathSelector} + +Perform an XPath query with this node provided as the "ContextScope". NOTE: you still need to start your XPath with a '.' to indicate you wish to find nested XPaths. + +This is often useful to mix and match with querySelectors when you want to select on Text values of nodes. + +``` + await hero.querySelector('ul').$xpathSelector('.//[.,"LAX"]') +``` + +Attached to Nodes and Elements ([see list](#super-nodes)). + +#### **Arguments**: + +- selector `string`. A valid XPath selector + +#### **Returns**: `Promise` + +## Collection Methods + +### nodeList.$map*(iteratorFn)* {#map} + +Adds syntactic sugar to run an `Array.map` on the results and await all results. This can be useful to transform results. + +Attached to NodeCollections ([see list](#super-collections)). + +#### **Returns**: `Promise` + +### nodeList.$reduce*(iteratorFn, initialValue)* {#reduce} + +Adds syntactic sugar to run an `Array.reduce` on the results and await a reduced result. This can be useful to transform results. + +Attached to NodeCollections ([see list](#super-collections)). + +#### **Returns**: `Promise` diff --git a/docs/main/BasicInterfaces/Handler.md b/docs/main/BasicInterfaces/Handler.md deleted file mode 100644 index ae210ff2a..000000000 --- a/docs/main/BasicInterfaces/Handler.md +++ /dev/null @@ -1,244 +0,0 @@ -# Handler - -Handlers provide a simple interface to load-balance many concurrent Hero sessions across one or more `Hero Cores`. - -```js -import { Handler } from '@ulixee/hero'; - -(async () => { - const handler = new Handler(); - - const hero = handler.createHero(); - await hero.goto('https://ulixee.org'); - - async function getDatasetCost(hero: Hero) { - const dataset = hero.input; - await hero.goto(`https://ulixee.org${dataset.href}`); - const cost = hero.document.querySelector('.cost .large-text'); - hero.output.cost = await cost.textContent; - } - - const links = await hero.document.querySelectorAll('a.DatasetSummary'); - for (const link of links) { - const name = await link.querySelector('.title').textContent; - const href = await link.getAttribute('href'); - handler.dispatchHero(getDatasetCost, { - name, - input: { - name, - href, - }, - }); - } - - const results = await handler.waitForAllDispatches(); - for (const result of results) { - const cost = result.output.cost; - const name = result.input.name; - console.log('Cost of %s is %s', name, cost); - } - await handler.close(); -})(); -``` - -Handlers allow you to queue up actions to take as Heros become available. They'll automatically round-robin between available connections. It's a simple way to complete all your scrapes without overloading the local machine or remote browsers. - -## Constructor - -### new Handler*(...connections)* {#constructor} - -The Handler constructor takes one or more "connections" to `Hero Core` instances. - -`Cores` can be located remotely or in the same process. A remote connection includes a "host" parameter that will be connected to via tcp (and needs to be open on any firewalls). - -Every connection controls how many maximum concurrent Heros should be open at any given time. Requests for Heros will be round-robined between all connections. - -#### **Arguments**: - -Connections can be either: - -- options `object`. A set of settings that controls the creation of a [`connection`](/docs/advanced/connection-to-core#options) to a `Hero Core`. - - host `string`. An optional `hostname:port` url that will be used to establish a connection to a Hero Core running on another machine. If no host is provided, a connection to a "locally" running `Core` will be attempted. - - maxConcurrency `number`. The max number of Heros to allow to be dispatched and created at the same time. Heros are "active" until the dispatchHero callback is complete, or the created Hero is closed. If not provided, this number will match the max allowed by a `Core`. - - heroTimeoutMillis `number`. The number of milliseconds to give each Hero in this connection to complete a session. A TimeoutError will be thrown if this time is exceeded. - - localProxyPortStart `number` defaults to `any open port`. Starting internal port to use for the mitm proxy. - - sessionsDir `string` defaults to `os.tmpdir()/.ulixee`. Directory to store session files and mitm certificates. - - replayServerPort `number`. Port to start a live replay server on. Defaults to "any open port". -- connectionToCore [`ConnectionToCore`](/docs/advanced/connection-to-core#options). A pre-initialized connection to a `Hero Core`. You can use this option to pre-check your connection to a remote connection, or to provide customization to the connection. - -```js -const { Handler } = require('@ulixee/hero'); - -(async () => { - const remote = new RemoteConnectionToCore({ - host: '10.10.1.1:1588', - }); - await remote.connect(); - - const handler = new Handler(remote1, { - host: '172.234.22.2:1586', - maxConcurrency: 5, - }); - - const hero = await handler.createHero(); -})(); -``` - -## Properties - -### handler.coreHosts {#core-hosts} - -Readonly property returning the resolved list of coreHosts. - -#### **Returns**: `Promise` - -### handler.defaultHeroOptions {#default-hero-properties} - -Sets default properties to apply to any new Hero created. Accepts any of the configurations that can be provided to [`createHero()`](#create-hero). - -#### **Returns**: `IHeroCreateOptions` - -See the [Configuration](/docs/overview/configuration) page for more details on `options` and its defaults. You may also want to explore [BrowserEmulators](/docs/plugins/browser-emulators) and [HumanEmulators](/docs/plugins/human-emulators). - -#### **Type**: [`Tab`](/docs/basic-interfaces/tab) - -## Methods - -### handler.addConnectionToCore*(options | connectionToCore)* {#add-connection} - -Adds a connection to the handler. This method will call connect on the underlying connection. - -Connection arguments are the same as the constructor arguments for a single connection. - -#### **Arguments**: - -Can be either: - -- options `object`. A set of settings that controls the creation of a [`connection`](/docs/advanced/connection-to-core#options) to a `Hero Core`. (see [`constructor`](#constructor)) -- connectionToCore [`ConnectionToCore`](/docs/advanced/connection-to-core#options). A pre-initialized connection to a `Hero Core`. - -#### **Returns**: `Promise` - -### handler.closeConnectionToCore*(coreHost)* {#close-connection} - -Closes and disconnects a connection from core. Heros "in-process" will throw `DisconnectedFromCoreError` on active commands. - -#### **Arguments**: - -- coreHost `string`. The coreHost connection. - -#### **Returns**: `Promise` - -### handler.close*()* {#close} - -Closes all underlying connections. NOTE: this function will "abort" any pending processes. You might want to call [`waitForAllDispatches()`](#wait-for-all-dispatches) first. - -#### **Returns**: `Promise` - -### handler.createHero*(options)* {#create-hero} - -Creates a new [`Hero`](/docs/basic-interfaces/hero) using one of the `Core` connections initialized in this Handler. If there are no connections with availability (based on `maxConcurrency` setting), the returned promise will not return until one is free. - -NOTE: when using this method, you must call [`hero.close()`](/docs/basic-interfaces/hero#close) explicitly to allow future Heros to be dispatched or created as needed. - -#### **Arguments**: - -- options `object`. Accepts any of the following: - - name `string`. This is used to generate a unique sessionName. - - browserEmulatorId `string` defaults to `default-browser-emulator`. Chooses the BrowserEmulator plugin which emulates the properties that help Hero look like a normal browser. - - humanEmulatorId `string` defaults to `default-human-emulator`. Chooses the HumanEmulator plugin which drives the mouse/keyboard movements. - - timezoneId `string`. Overrides the host timezone. A list of valid ids are available at [unicode.org](https://unicode-org.github.io/cldr-staging/charts/37/supplemental/zone_tzid.html) - - locale `string`. Overrides the host languages settings (eg, en-US). Locale will affect navigator.language value, Accept-Language request header value as well as number and date formatting rules. - - viewport `IViewport`. Sets the emulated screen size, window position in the screen, inner/outer width and height. If not provided, the most popular resolution is used from [statcounter.com](https://gs.statcounter.com/screen-resolution-stats/desktop/united-states-of-america). - - blockedResourceTypes `BlockedResourceType[]`. Controls browser resource loading. Valid options are listed [here](/docs/overview/configuration#blocked-resources). - - userProfile `IUserProfile`. Previous user's cookies, session, etc. - - showReplay `boolean`. Whether or not to show the Replay UI. Can also be set with an env variable: `HERO_SHOW_REPLAY=true`. - - input `object`. An object containing properties to attach to the hero (more frequently used with [`dispatchHero`](#dispatch-hero)) - - upstreamProxyUrl `string`. A socks5 or http proxy url (and optional auth) to use for all HTTP requests in this session. The optional "auth" should be included in the UserInfo section of the url, eg: `http://username:password@proxy.com:80`. - -See the [Configuration](/docs/overview/configuration) page for more details on `options` and its defaults. You may also want to explore [BrowserEmulators](/docs/plugins/browser-emulators) and [HumanEmulators](/docs/plugins/human-emulators). - -#### **Returns**: [`Promise`](/docs/basic-interfaces/hero) - -```js -const { Handler } = require('@ulixee/hero'); - -(async () => { - const handler = new Handler({ maxConcurrency: 2 }); - - const hero1 = await handler.createHero(); - const hero2 = await handler.createHero(); - - setTimeout(() => hero2.close(), 100); - - // will be available in 100 ms when hero2 closes - const hero3 = await handler.createHero(); -})(); -``` - -### handler.dispatchHero*(callbackFn, createHeroOptions?)* {#dispatch-hero} - -This method allows you queue up functions that should be called as soon as a connection can allocate a new Hero. All configurations available to `createHero` are available here. - -NOTE: you do not need to call close on an Hero when using this method. It will automatically be called when your callback returns. - -On Disconnecting: if a Core is shut-down or the handler closes a coreConnection while work is still in-progress, the hero commands will throw a `DisconnectedFromCoreError`. - -#### **Arguments**: - -- callbackFn `(hero) => Promise`. An asynchronous function that will be passed an initialized [Hero](/docs/basic-interfaces/hero) with the given `createHeroOptions` configuration. -- createHeroOptions `object`. Options used to create a new hero. Takes all options available to [`createHero()`](#create-hero). - -#### **Returns**: void - -```js -const { Handler } = require('@ulixee/hero'); - -(async () => { - const handler = new Handler({ maxConcurrency: 2 }); - - handler.dispatchHero( - async hero => { - const { url } = hero.input; - await hero.goto(url); - const links = await hero.document.querySelectorAll('a'); - for (const link of links) { - const href = await link.getAttribute('href'); - handler.dispatchHero( - async hero0 => { - await hero0.goto(hero0.input.link); - const body = await hero0.document.body.textContent; - }, - { input: { href } }, - ); - } - // send in data - }, - { input: { url: 'https://dataliberationfoundation.org' } }, - ); - - // resolves when all dispatched heros are completed or an error occurs - await handler.waitForAllDispatches(); - await handler.close(); -})(); -``` - -### handler.waitForAllDispatches*()* {#wait-for-all-dispatches} - -Waits for all heros which have been created using `dispatchHero` to complete. If any errors are thrown by Heros, the first exception will be thrown upon awaiting this method. - -#### **Returns**: `Promise` - -- DispatchResult - - sessionId `string key`. The session id assigned to the dispatched Hero. - - name `string`. The name assigned to this session. - - input `any`. Any input arguments passed to the dispatched Hero. - - output `any?`. The object set to hero.output if no error thrown. - - error `Error?`. An error if one has been thrown during dispatch. - - options `CreateHeroOptions`. Any arguments passed to the dispatched Hero. - -### handler.waitForAllDispatchesSettled*()* {#wait-for-all-dispatches-settled} - -Waits for all heros which have been created using `dispatchHero` to complete or throw an error. This method will always wait for all dispatches to finish, regardless of errors thrown. This is different from `waitForAllDispatches`, which will throw on any dispatch errors. - -#### **Returns**: `Promise` diff --git a/docs/main/BasicInterfaces/Hero.md b/docs/main/BasicInterfaces/Hero.md index 9a9fb4cdd..f0bfe03eb 100644 --- a/docs/main/BasicInterfaces/Hero.md +++ b/docs/main/BasicInterfaces/Hero.md @@ -13,7 +13,7 @@ const Hero = require('@ulixee/hero'); })(); ``` -An Hero instance can be thought of as a single user-browsing session. A default instance is automatically initialized and available as the default export of `ulixee`. Each additional instance you create has the following attributes: +An Hero instance can be thought of as a single user-browsing session. Each instance you create has the following attributes: #### Replayable @@ -21,7 +21,7 @@ An instance has a [replayable](/docs/advanced/session-replay) [Session](/do #### Lightweight -Instances are very lightweight, sharing a pool of browsers underneath. To manage concurrent scrapes in a single script, you can create one Hero for each scrape, or manage load and concurrency with a [Handler](/docs/basic-interfaces/handler). +Instances are very lightweight, sharing a pool of browsers underneath. To manage concurrent scrapes in a single script, you can create one Hero for each scrape. #### Single Active Tab @@ -31,24 +31,6 @@ Hero instances can have multiple [Tabs](/docs/basic-interfaces/tab), but only a Each Hero instance creates a private environment with its own cache, cookies, session data and [BrowserEmulator](/docs/plugins/browser-emulators). No data is shared between instances -- each operates within an airtight sandbox to ensure no identities leak across requests. -## Default Instance {#default} - -A default instance is automatically initialized and available as the default export of `ulixee`. - -The default instance can receive configuration via command line arguments. Any args starting with `--input.*` will be processed. The resulting json object is available as [`hero.input`](#input) - -```js -// script.js -const Hero = require('@ulixee/hero'); - -const hero = new Hero(); -console.log(hero.input); // { secret: "true", hero: "true" } -``` - -```bash -$ node script.js --input.secret=true --input.hero=true -``` - ## Constructor ### new Hero*(options)* {#constructor} @@ -78,6 +60,7 @@ const Hero = require('@ulixee/hero'); - userAgent `strong`. This sets your browser's user agent string. Prefixing this string with a tilde (~) allows for dynamic options. - browserEmulatorId `string` defaults to `default-browser-emulator`. Chooses the BrowserEmulator plugin which emulates the properties that help Hero look like a normal browser. - humanEmulatorId `string` defaults to `default-human-emulator`. Chooses the HumanEmulator plugin which drives the mouse/keyboard movements. + - mode `string`. A mode of operation. This variable controls logging levels and whether Hero "tooling" should be activated. Defaults to environment variable `NODE_ENV`. - dnsOverTlsProvider `object`. Configure the host and port to use for DNS over TLS. This feature replicates the Chrome feature that is used if the host DNS provider supports DNS over TLS or DNS over HTTPS. A `null` value will disable this feature. - host `string`. The DNS provider host address. Google=8.8.8.8, Cloudflare=1.1.1.1, Quad9=9.9.9.9. - servername `string`. The DNS provider tls servername. Google=dns.google, Cloudflare=cloudflare-dns.com, Quad9=dns.quad9.net. @@ -97,8 +80,8 @@ const Hero = require('@ulixee/hero'); - positionY? `number`. Optional override browser Y position on screen in pixels (minimum 0, maximum 10000000). - blockedResourceTypes `BlockedResourceType[]`. Controls browser resource loading. Valid options are listed [here](/docs/overview/configuration#blocked-resources). - userProfile `IUserProfile`. Previous user's cookies, session, etc. - - input `object`. An object containing properties to attach to the hero. NOTE: if using the default hero, this object will be populated with command line variables starting with `--input.{json path}`. The `{json path}` will be translated into an object set to `hero.input`. - - showReplay `boolean`. Whether or not to show the Replay UI. Can also be set with an env variable: `HERO_SHOW_REPLAY=true`. + - showBrowser `boolean`. A boolean whether to show the Chrome browser window. Can also be set with an env variable: `HERO_SHOW_BROWSER=true`. + - showBrowserInteractions `boolean`. A boolean whether to inject user interactions to mimic headless mouse/keyboard activity - upstreamProxyUrl `string`. A socks5 or http proxy url (and optional auth) to use for all HTTP requests in this session. The optional "auth" should be included in the UserInfo section of the url, eg: `http://username:password@proxy.com:80`. - upstreamProxyIpMask `object`. Optional settings to mask the Public IP Address of a host machine when using a proxy. This is used by the default BrowserEmulator to mask WebRTC IPs. - ipLookupService `string`. The URL of an http based IpLookupService. A list of common options can be found in `plugins/default-browser-emulator/lib/helpers/lookupPublicIp.ts`. Defaults to `ipify.org`. @@ -115,7 +98,7 @@ Returns a reference to the currently active tab. ### hero.coreHost {#core-host} -The connectionToCore host address to which this Hero has connected. This is useful in scenarios where a Handler is round-robining connections between multiple hosts. +The connectionToCore host address to which this Hero has connected. This is useful in scenarios where Hero instances are round-robining between multiple hosts. #### **Type**: `Promise` @@ -133,17 +116,6 @@ Returns a list of [FrameEnvironments](/docs/basic-interfaces/frame-environment) #### **Type**: [`Promise`](/docs/basic-interfaces/frame-environment). -### hero.input {#input} - -Contains the input configuration (if any) for this hero. This might come from: - -- [`Handler.dispatchHero`](/docs/basic-interfaces/handler#dispatch-hero) -- or the [default `hero`](#default) - -NOTE: if using the default hero, this object will be populated with command line variables starting with `--input.*`. The parameters will be translated into an object set to `hero.input`. - -#### **Type**: Object - ### hero.isAllContentLoaded {#is-all-content-loaded} `True` if the "load" event has triggered on the active tab. @@ -211,40 +183,12 @@ Retrieves metadata about the hero configuration: - proxyIp `string`. The public IP address of the proxy. - publicIp `string`. The public IP address of the host machine. - userAgentString `string`. The user agent string used in Http requests and within the DOM. +- browserFullVersion `string`. The full version of Chrome (eg, 98.0.4758.102) +- operatingSystemPlatform `string`. The emulated operating system (eg, Windows) +- operatingSystemVersion `string`. The full operating system version (eg, 11.0.1) #### **Type**: `Promise` -### hero.output {#output} - -Hero output is an object used to track any data you collect during your session. Output will be shown in Replay during playback for easy visual playback of data collection. - -Output is able to act like an Array or an Object. It will serialize properly in either use-case. - -NOTE: any object you assign into Output is "copied" into the Output object. You should not expect further changes to the source object to synchronize. - -```js -const Hero = require('@ulixee/hero'); - -(async () => { - const hero = new Hero(); - await hero.goto('https://www.google.com'); - const document = hero.document; - - for (const link of await document.querySelectorAll('a')) { - hero.output.push({ - // will display in Replay UI. - text: await link.textContent, - href: await link.href, - }); - } - - console.log(hero.output); - await hero.close(); -})(); -``` - -#### **Type**: `Output`. An array-like object. - ### hero.sessionId {#sessionId} An identifier used for storing logs, snapshots, and other assets associated with the current session. @@ -255,8 +199,6 @@ An identifier used for storing logs, snapshots, and other assets associated with A human-readable identifier of the current Hero session. -You can set this property when calling [Handler.dispatchHero()](/docs/basic-interfaces/handler#dipatch-hero) or [Handler.createHero()](/docs/basic-interfaces/handler#create-hero). - #### **Type**: `Promise` ### hero.tabs {#tabs} @@ -312,32 +254,9 @@ Close a single Tab. The first opened Tab will become the focused tab. Alias for [Tab.close()](/docs/basic-interfaces/tab#close) -### hero.configure*(options)* {#configure} - -Update existing configuration settings. - -#### **Arguments**: - -- options `object` Accepts any of the following: - - userProfile `IUserProfile`. Previous user's cookies, session, etc. - - timezoneId `string`. Overrides the host timezone. A list of valid ids are available at [unicode.org](https://unicode-org.github.io/cldr-staging/charts/37/supplemental/zone_tzid.html) - - locale `string`. Overrides the host languages settings (eg, en-US). Locale will affect navigator.language value, Accept-Language request header value as well as number and date formatting rules. - - viewport `IViewport`. Sets the emulated screen size, window position in the screen, inner/outer width. (See constructor for parameters). - - blockedResourceTypes `BlockedResourceType[]`. Controls browser resource loading. Valid options are listed [here](/docs/overview/configuration#blocked-resources). - - upstreamProxyUrl `string`. A socks5 or http proxy url (and optional auth) to use for all HTTP requests in this session. The optional "auth" should be included in the UserInfo section of the url, eg: `http://username:password@proxy.com:80`. - - upstreamProxyIpMask `object`. Optional settings to mask the Public IP Address of a host machine when using a proxy. This is used by the default BrowserEmulator to mask WebRTC IPs. - - ipLookupService `string`. The URL of an http based IpLookupService. A list of common options can be found in `plugins/default-browser-emulator/lib/helpers/lookupPublicIp.ts`. Defaults to `ipify.org`. - - proxyIp `string`. The optional IP address of your proxy, if known ahead of time. - - publicIp `string`. The optional IP address of your host machine, if known ahead of time. - - connectionToCore `options | ConnectionToCore`. An object containing `IConnectionToCoreOptions` used to connect, or an already created `ConnectionToCore` instance. Defaults to booting up and connecting to a local `Core`. - -#### **Returns**: `Promise` - -See the [Configuration](/docs/overview/configuration) page for more details on `options` and its defaults. You may also want to explore [BrowserEmulators](/docs/plugins/browser-emulators) and [HumanEmulators](/docs/plugins/human-emulators). - ### hero.exportUserProfile*()* {#export-profile} -Returns a json representation of the underlying browser state for saving. This can later be restored into a new instance using `hero.configure({ userProfile: serialized })`. See the [UserProfile page](/docs/advanced/user-profile) for more details. +Returns a json representation of the underlying browser state for saving. This can later be restored into a new instance using `new Hero({ userProfile: serialized })`. See the [UserProfile page](/docs/advanced/user-profile) for more details. #### **Returns**: [`Promise`](/docs/advanced/user-profile) @@ -508,6 +427,14 @@ Alias for [Tab.querySelector](/docs/basic-interfaces/tab#query-selector) Alias for [Tab.querySelectorAll](/docs/basic-interfaces/tab#query-selector-all) +### hero.registerFlowHandler*(name, state, handlerFn)* {#register-flow-handler} + +Alias for [Tab.registerFlowHandler](/docs/basic-interfaces/tab#register-flow-handler) + +### tab.flowCommand*(commandFn, exitState?, options?)* {#flow-command} + +Alias for [Tab.flowCommand](/docs/basic-interfaces/tab#flow-command) + ### hero.reload*(timeoutMs?)* {#reload} Alias for [Tab.reload](/docs/basic-interfaces/tab#reload) @@ -516,6 +443,10 @@ Alias for [Tab.reload](/docs/basic-interfaces/tab#reload) Alias for [Tab.takeScreenshot](/docs/basic-interfaces/tab#take-screenshot) +### hero.validateState*(state)* {#validate-state} + +Alias for [Tab.validateState](/docs/basic-interfaces/tab#validate-state) + ### hero.waitForFileChooser*(options)* {#wait-for-file-chooser} Alias for [Tab.waitForFileChooser()](/docs/basic-interfaces/tab#wait-for-file-chooser) diff --git a/docs/main/BasicInterfaces/Interactions.md b/docs/main/BasicInterfaces/Interactions.md index 516f06e4f..edfe76000 100644 --- a/docs/main/BasicInterfaces/Interactions.md +++ b/docs/main/BasicInterfaces/Interactions.md @@ -58,7 +58,7 @@ All button commands (click, doubleclick, etc) operate on the `Left` button by de hero.interact({ clickRight: [55, 42] }); ``` -#### **ClickVerification**: +#### **ClickVerification**: {#click-verification} Click commands can include a click verification when a [`SuperElement`](/docs/awaited-dom/super-element) is provided as the `MousePosition`. This is the strategy used to confirm that a specific element is clicked after scrolling and moving the mouse over the target. The default verification is `elementAtPath` if none is provided. diff --git a/docs/main/BasicInterfaces/Tab.md b/docs/main/BasicInterfaces/Tab.md index dc0e607c6..37751148d 100644 --- a/docs/main/BasicInterfaces/Tab.md +++ b/docs/main/BasicInterfaces/Tab.md @@ -320,6 +320,96 @@ Reload the currently loaded url. #### **Returns**: [`Promise`](/docs/advanced/resource) The loaded resource representing this page. + +### tab.registerFlowHandler*(name, state, handlerFn)* {#register-flow-handler} + +Register a [FlowHandler](/docs/advanced/flow) on the given tab. A FlowHandler is a callback function that will be invoked anytime your Hero script encounters Awaited Dom errors. These can be used to correct your script flow. + +As an example, imagine you are interacting with a website that sometimes pops up an "Accept Cookies" modal. As you don't know "when" it might trigger, it can be difficult to know when to look for the modal. With a FlowHandler, you declare the [State](#wait-for-state) that should trigger the associated callback, and a function to dismiss the cookie popup. + +``` +5. await hero.registerFlowHandler('CookieModal', assert => { +6. assert(hero.querySelector('#cookie-modal').$isVisible); +7. }, +8. async error => { +9. await hero.querySelector('#cookie-modal .dismiss').$click(); +10. }); +``` + +Once registered, your `CookieModal` FlowHandler will be automatically checked anytime an AwaitedDom error occurs. These errors are things like: an element can't be found, an element interaction failed, or waiting for an element [State](#wait-for-state) timed out. + +So, to continue our example, imagine your script is filling out a form field. As you go to click on the field, it can't be clicked because the `CookieModal` has displayed. + +``` +5. await hero.registerFlowHandler('CookieModal', +... +12. await hero.querySelector('#field1').$click(); // FAILS DUE TO OBSTRUCTION +``` + +When your script fails to click on `#field1` (line 12 above), the `CookieModal` handler is checked. It matches the current state, and so triggers closing the modal. + +``` +9. await hero.querySelector('#cookie-modal .dismiss').$click(); <--- Closes the modal! +``` + +Now your script is no longer obstructed and will re-resume clicking on `#field1` (line 12 above). + + +You might find it useful to continue to accumulate FlowHandlers as you encounter edge cases in your script. In the default case, your individual commands will be retried when a FlowHandler can resolve your page state. In more advanced cases, you might find that you need to resume a "block" of activities. To handle these cases, we have [FlowCommands](#flow-commands). + +[FlowCommands](#flow-commands) are simply ways to group a series of commands together. When an AwaitedDom error occurs, a Flow Command will re-rerun the entire block. In the example above, your interaction might have many steps. You would want to ensure all steps are run when a failure is encountered. + +``` + await hero.flowCommand(async () => { + await hero.querySelector('#field1').$click(); + await hero.querySelector('#field1').$clearInputText(); + + // Failure here resumes the entire block once a FlowHandler fixes the state + await hero.querySelector('#field1').$type('value'); + }); +``` + +#### **Arguments**: +- name `string`. A required name to give to this FlowHandler. NOTE: many FlowHandlers trigger on generic querySelector strings (eg, .modal.a1-regEU). Without this self-documenting name, we found them very difficult to decipher after a few weeks passed. +- state `DomState | (assert: IPageStateAssert) => void`. A [State](#wait-for-state) object or callback for the assertion to match. +- handlerFn `() => Promise`. An asynchronous function in which you can resolve the page state to handle this issue. + +#### **Returns**: `Promise` + +### tab.flowCommand*(commandFn, exitState?, options?)* {#flow-command} + +A FlowCommand allows you define a "recovery" boundary in the case where an AwaitedDom error triggers a FlowHandler and modifies your page state. In some cases, you may wish to ensure that a series of commands are re-run instead of a single failing command. For instance, if you lose focus on a modal-window field in the middle of typing, you will want to run the logic that prompted the modal-window to show up. + +FlowCommands can define an `exitState`, which will be tested before moving on. An `exitState` is a [`State`](#wait-for-state) object or callback function defined the same as the parameter to [`waitForState`](#wait-for-state). If your function completes and the `exitState` cannot be satisfied, all FlowHandlers will be triggered and the function will try again (up to the `maxRetries` times provided in options). + +``` + await hero.flowCommand(async () => { + await hero.querySelector('#modalPrompt').$click(); + await hero.querySelector('#field1').$type('text'); + }, assert => { + assert(hero.querySelector('#field1').value, 'text'); <--- if false, 1. Prompt FlowHandlers, 2. Retry Command + }); +``` + + +Flow Commands can be nested within each other. If nested commands cannot be completed due to AwaitedDom errors (interactions, dom errors, dom state timeouts), they will trigger the outer block to be re-tried. + +#### **Arguments**: + +- commandFn `() => Promise`. Your command function containing one or more Hero commands to retry on AwaitedDom errors (after resolving one or more FlowHandlers). Any returned value will be returned to the `tab.flowCommand` call. +- exitState `DomState | (assert: IPageStateAssert) => void`. Optional [State](#wait-for-state) object that must resolve before continuing your script execution. If false, FlowHandlers will be retried to determine if another pass should be made. +- options `object`. Optional options to configure this flowCommand + - maxRetries `number`. Default `3`.The number of times this FlowCommand should be retried before throwing an error. + +#### **Returns**: `Promise` + + +### tab.triggerFlowHandlers*()* {#trigger-flow-handler} + +Check the state of all [FlowHandlers](#register-flow-handler) and trigger them to run if they match the current page state. + +#### **Returns**: `Promise` + ### tab.takeScreenshot*(options?)* {#take-screenshot} Takes a screenshot of the current contents rendered in the browser. @@ -406,9 +496,19 @@ NOTE: Null access exceptions are ignored, so you don't need to worry about indiv }); ``` +WaitForState can be optionally shortened to the callback: + +``` +await hero.waitForState(assert => { + assert(hero.url, 'https://dataliberationfoundation.org'); // a value will be tested for equality + assert(hero.isPaintingStable); + assert(hero.document.querySelector('h1').textContent, text => text === "It's Time to Open the Data Economy"); +}); +``` + #### **Arguments**: -- state `object` +- state `object` | `(assert: IPageStateAssert) => void`. A state object or just the callback directly as a shorter option. - name? `string`. Optional name of the state - url? `string` | `Regexp`. Optional url to run this state on (useful for running in a loop) - all `(assert: IPageStateAssert) => void`. A synchronous function that will be true if all assertions evaluate to true. @@ -418,8 +518,16 @@ NOTE: Null access exceptions are ignored, so you don't need to worry about indiv - options `object` Optional - timeoutMs `number`. Timeout in milliseconds. Default `30,000`. -#### **Returns**: `Promise` +#### **Returns**: `Promise` + +### tab.validateState*(state)* {#validate-state} + +Check a [State](#wait-for-state) defined as per `tab.waitForState` above. Instead of waiting, this method will check the state a single time and return a boolean if the state is valid. +#### **Arguments**: +- state `object` | `(assert: IPageStateAssert) => void`. A state object or just the callback directly as a shorter option. + +#### **Returns**: `Promise` ### tab.waitForFileChooser*(options)* {#wait-for-file-chooser} diff --git a/docs/main/Contribute/how-to-contribute.md b/docs/main/Contribute/how-to-contribute.md index e29efa0f6..f553e00b7 100644 --- a/docs/main/Contribute/how-to-contribute.md +++ b/docs/main/Contribute/how-to-contribute.md @@ -1,32 +1,28 @@ # How to contribute -> Hero is an open-source project built by core maintainers and contributors. We want to make it easy for anyone to participate. Contribute to core, build plugins, improve documentation or write a blog post. It all helps Hero on its mission to keep the web open for innovation. -Read the [code of conduct](/docs/contribute/code-of-conduct). +> Hero is an open-source project built by core maintainers and contributors. We want to make it easy for anyone to participate. Contribute to core, build plugins, improve documentation or improve detection evasions. It all helps Hero on its mission to keep the web easily scriptable. + +Read the [code of conduct](./code-of-conduct). ## Contributing to the Code -Hero uses a **monorepo** pattern to manage its dependencies and core plugins. To contribute, you'll probably want to to setup the Hero repository locally. +Hero uses a **monorepo** pattern to manage its dependencies and core plugins. To contribute, you'll probably want to to setup the Hero repository locally. ### Setting Up the Hero Repository -Install [Node.js 8.3](https://nodejs.org/en/download/) or higher and [Yarn](https://yarnpkg.com/lang/en/docs/install/). - -1. Clone the `https://github.com/ulixee/ulixee.git` repository. +Install [Node.js 14](https://nodejs.org/en/download/) or higher and [Yarn](https://yarnpkg.com/lang/en/docs/install/). -To use `@ulixee/cli` in the repo as a global command. Enter the `~/packages/cli` folder and run `npm link`. +1. Clone the `https://github.com/ulixee/hero.git` repository. **Yarn** will add dependencies from your test projects to the root `yarn.lock` file. So you should not commit changes in that file unless you have added dependencies to any of the core packages. If you need to commit it, remove your projects from the `~/projects` folder temporary and run `yarn` in the root folder. Yarn will then clean up the lock file with only core dependencies. Commit the file and move your projects back and run `yarn` again to start developing. - ## Contributing to the docs + We are a strong believer that documentation is very important for any open-source projects. Hero uses Gridsome for its website and documentation. 1. If you want to add/modify any Hero documentation, go to the - [docs folder on GitHub](https://github.com/ulixee/ulixee/tree/master/website/docs) and + [docs folder on GitHub](https://github.com/ulixee/hero/tree/main/docs/main) and use the file editor to edit and then preview your changes. 2. GitHub then allows you to commit the change and raise a PR right in the UI. This is the _easiest_ way you can contribute to Hero! -You can also clone [the Hero repo](https://github.com/ulixee/ulixee) and work locally on documentation. - -## Contributing to the blog -*Coming soon...* +You can also clone [the Hero repo](https://github.com/ulixee/hero) and work locally on documentation. diff --git a/docs/main/Help/troubleshooting.md b/docs/main/Help/troubleshooting.md index 59deb3cd0..d90cd1cd5 100644 --- a/docs/main/Help/troubleshooting.md +++ b/docs/main/Help/troubleshooting.md @@ -20,12 +20,6 @@ Browsers will be saved to a shared location on each OS. Each browser version wil - Linux: ~/.cache (environment variable XDG_CACHE_HOME) - Windows: ~/AppData/Local (environment variable LOCALAPPDATA) -#### Replay - -Hero also installs an app called [Replay](/docs/advanced/session-replay) to debug and troubleshoot sessions. Replay is ~200MB unpacked. To skip download (ie, in a production environment), you can set the following environmental variable: `HERO_REPLAY_SKIP_BINARY_DOWNLOAD=true`. - -If you continue to have problems, [let us know](https://github.com/ulixee/ulixee/issues). - ### Debugging Logs By default, Hero logs everything to a [Session](/docs/advanced/session) database that is created per Hero instance. The SessionLogs table contains all debug logs. @@ -73,4 +67,4 @@ Logger.injectLogger({ ### Problems after an upgrade -If you have problems after upgrading, [let us know](https://github.com/ulixee/ulixee/issues). +If you have problems after upgrading, [let us know](https://github.com/ulixee/hero/issues). diff --git a/docs/main/Overview/BasicConcepts.md b/docs/main/Overview/BasicConcepts.md index 20ff41ef5..9a58535de 100644 --- a/docs/main/Overview/BasicConcepts.md +++ b/docs/main/Overview/BasicConcepts.md @@ -33,7 +33,7 @@ for (const elem of await elems) { } ``` -Hero's Dynamic DOM allows you to keep all calls within your script context. It also follows the W3C spec to a T. In fact, go ahead and copy lines 3 through 7 and run paste them into your browser's DevTools. They run perfectly. +Hero's Awaited DOM allows you to keep all calls within your script context. It also follows the W3C spec to a T. In fact, go ahead and copy lines 3 through 7 and run paste them into your browser's DevTools. They run perfectly. ### Doing It with Puppeteer @@ -67,8 +67,7 @@ When you're trying to eke out performance, a common technique is to disable rend ```js import Hero from '@ulixee/hero'; -const hero = new Hero(); -await hero.configure({ +const hero = new Hero({ blockedResourceTypes: ['All'], }); await hero.goto('https://ulixee.org'); diff --git a/docs/main/Overview/Configuration.md b/docs/main/Overview/Configuration.md index 365b82008..41a886eed 100644 --- a/docs/main/Overview/Configuration.md +++ b/docs/main/Overview/Configuration.md @@ -2,50 +2,32 @@ Configuration variables can be defined at a few levels: -- `Hero` At an instance level, configured via [hero.configure()](/docs/basic-interfaces/hero#configure) or [new Hero()](/docs/basic-interfaces/hero#constructor), or when creating [Handler](/docs/basic-interfaces/handler) heros using [handler.createHero()](/docs/basic-interfaces/handler#create-hero) or [handler.dispatchHero()](/docs/basic-interfaces/handler#dispatch-hero). +- `Hero` At an instance level, configured via [new Hero()](/docs/basic-interfaces/hero#constructor). - `Connection` At a connection level, which can be configured when creating a new [ConnectionToCore](/docs/advanced/connection-to-core#configuration). - `Core` At an internal level, using the `@ulixee/hero-core` module of Hero. This must be run in the environment where your Browser Engine(s) and `@ulixee/hero-core` module are running. If you're running remote, this will be your server. -The internal `@ulixee/hero-core` module can receive several configuration options on [start](#core-start), or when a [Handler](/docs/basic-interfaces/handler) or [Hero](/docs/basic-interfaces/hero) establishes a [connection](/docs/advanced/connection-to-core). +The internal `@ulixee/hero-core` module can receive several configuration options on [start](#core-start), or when a new [connection](/docs/advanced/connection-to-core) is established. ### Connection To Core
Hero
-The [ConnectionToCore](/docs/advanced/connection-to-core) to be used by a [Handler](/docs/basic-interfaces/handler) or [Hero](/docs/basic-interfaces/hero). +The [ConnectionToCore](/docs/advanced/connection-to-core) to be used by one or more [Hero](/docs/basic-interfaces/hero) instances. All [configurations](/docs/advanced/connection-to-core#configurations) accept both an `options` object and a [`ConnectionToCore`](/docs/advanced/connection-to-core) instance. -Configuration is accepted in the following methods and constructors: +### Max Concurrent Heroes Count
Core
-- [hero.configure()](/docs/basic-interfaces/hero#configure) - apply the connection to the default hero, or to a an hero constructed prior to the first connection. -- [new Hero()](/docs/basic-interfaces/hero#constructor) - the new hero will use this connection. -- [new Handler(...connections)](/docs/basic-interfaces/handler#constructor) - a handler takes one or more coreClientConnection options or instances. - -### Max Concurrent Heros Count
Core
- -Limit concurrent Heros operating at any given time across all [connections](/docs/advanced/connection-to-core) to a "Core". Defaults to `10`. +Limit concurrent Heroes operating at any given time across all [connections](/docs/advanced/connection-to-core) to a "Core". Defaults to `10`. Configurable via [`Core.start()`](#core-start) or [`ConnectionToCore`](/docs/advanced/connection-to-core#configuration). -### Local Proxy Port Start
ConnectionCore
- -Configures the port the Man-In-the-Middle server will listen on locally. This server will correct headers and TLS signatures sent by requests to properly emulate the desired browser engine. Default port is `0`, which will find an open port locally. - -Configurable via [`Core.start()`](#core-start) or the first [`ConnectionToCore`](/docs/advanced/connection-to-core#configuration). - -### Replay Session Port
ConnectionCore
- -Configures the port Replay uses to serve Session data. - -Configurable via [`Core.start()`](#core-start) or the first [`ConnectionToCore`](/docs/advanced/connection-to-core#configuration). - -### Sessions Dir
ConnectionCore
{#sessions-dir} +### Data Dir
ConnectionCore
{#data-dir} Configures the storage location for files created by Core. -- Replay session files +- Session Databases - Man-in-the-middle network certificates -`Environmental variable`: `HERO_SESSIONS_DIR=/your-absolute-dir-path` +`Environmental variable`: `HERO_DATA_DIR=/your-absolute-dir-path` Configurable via [`Core.start()`](#core-start) or the first [`ConnectionToCore`](/docs/advanced/connection-to-core). @@ -77,7 +59,7 @@ A user profile stores and restores Cookies, DOM Storage and IndexedDB records fo const rawProfileJson = fs.readFileSync('profile.json', 'utf-8'); const profile = JSON.parse(rawProfileJson); // { cookies: { sessionId: 'test' }} -hero.configure({ userProfile: profile }); +const hero = new Hero({ userProfile: profile }); const latestUserProfile = await hero.exportUserProfile(); // { cookies, localStorage, sessionStorage, indexedDBs } @@ -88,7 +70,7 @@ const latestUserProfile = await hero.exportUserProfile(); fs.writeFileSync('profile.json', JSON.stringify(latestUserProfile, null, 2)); ``` -### Upstream Proxy
Hero
+### Upstream Proxy Url
Hero
Configures a proxy url to route traffic through for a given Hero. This function supports two types of proxies: @@ -116,9 +98,7 @@ Update existing settings. #### **Arguments**: - options `object` Accepts any of the following: - - maxConcurrentHerosCount `number` defaults to `10`. Limit concurrent Hero sessions running at any given time. - - localProxyPortStart `number` defaults to `any open port`. Starting internal port to use for the mitm proxy. - - sessionsDir `string` defaults to `os.tmpdir()/.ulixee`. Directory to store session files and mitm certificates. - - coreServerPort `number`. Port to run the Core Websocket/Replay server on. + - maxConcurrentClientCount `number` defaults to `10`. Limit concurrent Hero sessions running at any given time. + - dataDir `string` defaults to `os.tmpdir()/.ulixee`. Directory to store session databases and mitm certificates. #### **Returns**: `Promise` diff --git a/docs/main/Overview/Introduction.md b/docs/main/Overview/Introduction.md index c0076d3bd..f97448f23 100644 --- a/docs/main/Overview/Introduction.md +++ b/docs/main/Overview/Introduction.md @@ -23,16 +23,16 @@ Instead of creating another complex puppeteer-like API that requires use of nest To use Hero in your project, install it with npm or yarn: ```bash -npm i --save ulixee +npm i --save @ulixee/hero-fullstack ``` or ```bash -yarn add ulixee +yarn add @ulixee/hero-fullstack ``` -When you install Hero, it also downloads a recent version of Chrome and an app call [Replay](/docs/advanced/session-replay) to debug and troubleshoot sessions. +When you install Hero, it also downloads a recent version of Chrome and data files to emulate headed (visible UI) Chrome on Mac OS and Windows. More details about installation can be found on the [troubleshooting](/docs/help/troubleshooting) page. diff --git a/docs/main/Plugins/CorePlugins.md b/docs/main/Plugins/CorePlugins.md index fe4797cab..295f692c2 100644 --- a/docs/main/Plugins/CorePlugins.md +++ b/docs/main/Plugins/CorePlugins.md @@ -76,7 +76,7 @@ The following methods are optional. Add them to your plugin as needed. ### configure(config) -This hook is called during the initialization of a session/browserEmulator as well as every time hero.configure is called from the client. +This hook is called during the initialization of a session/browserEmulator. #### **Arguments**: @@ -85,6 +85,9 @@ This hook is called during the initialization of a session/browserEmulator as we - geolocation `Geolocation`. This is an object containing longtitude and latitude, among other properties. - timezoneId `string`. The configured unicode TimezoneId or host default (eg, America/New_York). - locale `string`. The configured locale in use (eg, en-US). + - upstreamProxyUrl `string`. A socks5 or http proxy url (and optional auth) to use for all HTTP requests in this session. See Hero constructor for details. + - upstreamProxyIpMask `object`. Optional settings to mask the Public IP Address of a host machine when using a proxy. See Hero constructor for details. + - dnsOverTlsProvider `object`. Configure the host and port to use for DNS over TLS. See Hero constructor for details. Modify any value in the object to change it session-wide. diff --git a/docs/main/index.md b/docs/main/index.md index c0076d3bd..f97448f23 100644 --- a/docs/main/index.md +++ b/docs/main/index.md @@ -23,16 +23,16 @@ Instead of creating another complex puppeteer-like API that requires use of nest To use Hero in your project, install it with npm or yarn: ```bash -npm i --save ulixee +npm i --save @ulixee/hero-fullstack ``` or ```bash -yarn add ulixee +yarn add @ulixee/hero-fullstack ``` -When you install Hero, it also downloads a recent version of Chrome and an app call [Replay](/docs/advanced/session-replay) to debug and troubleshoot sessions. +When you install Hero, it also downloads a recent version of Chrome and data files to emulate headed (visible UI) Chrome on Mac OS and Windows. More details about installation can be found on the [troubleshooting](/docs/help/troubleshooting) page. diff --git a/docs/main/links.yaml b/docs/main/links.yaml index ae02d8923..174dc3527 100644 --- a/docs/main/links.yaml +++ b/docs/main/links.yaml @@ -8,11 +8,11 @@ - title: Basic Interfaces items: - Hero - - Handler - Tab - FrameEnvironment - Interactions - AwaitedDOM + - DomExtenders #- DetachedDOM - title: Advanced diff --git a/fullstack/test/domExtenders.test.ts b/fullstack/test/domExtenders.test.ts index 14b594333..ac819dab4 100644 --- a/fullstack/test/domExtenders.test.ts +++ b/fullstack/test/domExtenders.test.ts @@ -59,7 +59,7 @@ describe('basic DomExtender tests', () => { ctx.body = ``; }); const hero = await openBrowser(`/domextender-clear`); - await expect(hero.querySelector('#field').$clearValue()).resolves.toBe(undefined); + await expect(hero.querySelector('#field').$clearInputText()).resolves.toBe(undefined); await expect(hero.querySelector('#field').value).resolves.toBe(''); }); }); diff --git a/fullstack/test/flow.test.ts b/fullstack/test/flow.test.ts index 85f1fe1f7..840f068b4 100644 --- a/fullstack/test/flow.test.ts +++ b/fullstack/test/flow.test.ts @@ -229,7 +229,7 @@ describe('flow commands', () => { flowCommandSpy(); const field = await hero.querySelector('#text-field'); await field.$click(); - await field.$clearValue(); + await field.$clearInputText(); const text = await hero.querySelector('h1').textContent; await field.$type(text); }); @@ -307,7 +307,7 @@ describe('flow commands', () => { }, async () => { await hero.querySelector('#text').focus(); - await hero.querySelector('#text').$clearValue(); + await hero.querySelector('#text').$clearInputText(); }, ); @@ -328,6 +328,7 @@ describe('flow commands', () => { await expect(hero.querySelector('#text').value).resolves.toBe('test'); }); + it('can handle nested command blocks', async () => { koaServer.get('/flowForm', ctx => { ctx.body = ` @@ -337,14 +338,15 @@ describe('flow commands', () => {
- - + +