diff --git a/maxun-core/package.json b/maxun-core/package.json index faa133ec5..90ee01b77 100644 --- a/maxun-core/package.json +++ b/maxun-core/package.json @@ -1,6 +1,6 @@ { "name": "maxun-core", - "version": "0.0.4", + "version": "0.0.5", "description": "Core package for Maxun, responsible for data extraction", "main": "build/index.js", "typings": "build/index.d.ts", diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 369a08be8..09b6578be 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -283,13 +283,13 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, } else if (attribute === 'innerHTML') { record[label] = fieldElement.innerHTML.trim(); } else if (attribute === 'src') { - // Handle relative 'src' URLs - const src = fieldElement.getAttribute('src'); - record[label] = src ? new URL(src, baseUrl).href : null; + // Handle relative 'src' URLs + const src = fieldElement.getAttribute('src'); + record[label] = src ? new URL(src, window.location.origin).href : null; } else if (attribute === 'href') { // Handle relative 'href' URLs const href = fieldElement.getAttribute('href'); - record[label] = href ? new URL(href, baseUrl).href : null; + record[label] = href ? new URL(href, window.location.origin).href : null; } else { record[label] = fieldElement.getAttribute(attribute); } @@ -346,5 +346,5 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, return results; }; - + })(window); \ No newline at end of file diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index a7a5de47e..afea8e476 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -121,6 +121,53 @@ export default class Interpreter extends EventEmitter { } } + // private getSelectors(workflow: Workflow, actionId: number): string[] { + // const selectors: string[] = []; + + // // Validate actionId + // if (actionId <= 0) { + // console.log("No previous selectors to collect."); + // return selectors; // Empty array as there are no previous steps + // } + + // // Iterate from the start up to (but not including) actionId + // for (let index = 0; index < actionId; index++) { + // const currentSelectors = workflow[index]?.where?.selectors; + // console.log(`Selectors at step ${index}:`, currentSelectors); + + // if (currentSelectors && currentSelectors.length > 0) { + // currentSelectors.forEach((selector) => { + // if (!selectors.includes(selector)) { + // selectors.push(selector); // Avoid duplicates + // } + // }); + // } + // } + + // console.log("Collected Selectors:", selectors); + // return selectors; + // } + + private getSelectors(workflow: Workflow): string[] { + const selectorsSet = new Set(); + + if (workflow.length === 0) { + return []; + } + + for (let index = workflow.length - 1; index >= 0; index--) { + const currentSelectors = workflow[index]?.where?.selectors; + + if (currentSelectors && currentSelectors.length > 0) { + currentSelectors.forEach((selector) => selectorsSet.add(selector)); + return Array.from(selectorsSet); + } + } + + return []; + } + + /** * Returns the context object from given Page and the current workflow.\ * \ @@ -130,52 +177,63 @@ export default class Interpreter extends EventEmitter { * @param workflow Current **initialized** workflow (array of where-what pairs). * @returns {PageState} State of the current page. */ - private async getState(page: Page, workflow: Workflow): Promise { + private async getState(page: Page, workflowCopy: Workflow, selectors: string[]): Promise { /** * All the selectors present in the current Workflow */ - const selectors = Preprocessor.extractSelectors(workflow); + // const selectors = Preprocessor.extractSelectors(workflow); + // console.log("Current selectors:", selectors); /** * Determines whether the element targetted by the selector is [actionable](https://playwright.dev/docs/actionability). * @param selector Selector to be queried * @returns True if the targetted element is actionable, false otherwise. */ - const actionable = async (selector: string): Promise => { - try { - const proms = [ - page.isEnabled(selector, { timeout: 500 }), - page.isVisible(selector, { timeout: 500 }), - ]; - - return await Promise.all(proms).then((bools) => bools.every((x) => x)); - } catch (e) { - // log(e, Level.ERROR); - return false; - } - }; + // const actionable = async (selector: string): Promise => { + // try { + // const proms = [ + // page.isEnabled(selector, { timeout: 5000 }), + // page.isVisible(selector, { timeout: 5000 }), + // ]; + + // return await Promise.all(proms).then((bools) => bools.every((x) => x)); + // } catch (e) { + // // log(e, Level.ERROR); + // return false; + // } + // }; /** * Object of selectors present in the current page. */ - const presentSelectors: SelectorArray = await Promise.all( - selectors.map(async (selector) => { - if (await actionable(selector)) { - return [selector]; - } - return []; - }), - ).then((x) => x.flat()); + // const presentSelectors: SelectorArray = await Promise.all( + // selectors.map(async (selector) => { + // if (await actionable(selector)) { + // return [selector]; + // } + // return []; + // }), + // ).then((x) => x.flat()); + + const action = workflowCopy[workflowCopy.length - 1]; + + // console.log("Next action:", action) + + let url: any = page.url(); + + if (action && action.where.url !== url && action.where.url !== "about:blank") { + url = action.where.url; + } return { - url: page.url(), + url, cookies: (await page.context().cookies([page.url()])) .reduce((p, cookie) => ( { ...p, [cookie.name]: cookie.value, }), {}), - selectors: presentSelectors, + selectors, }; } @@ -365,6 +423,7 @@ export default class Interpreter extends EventEmitter { console.log("MERGED results:", mergedResult); await this.options.serializableCallback(mergedResult); + // await this.options.serializableCallback(scrapeResult); }, scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => { @@ -410,6 +469,16 @@ export default class Interpreter extends EventEmitter { }), }; + const executeAction = async (invokee: any, methodName: string, args: any) => { + console.log("Executing action:", methodName, args); + if (!args || Array.isArray(args)) { + await (invokee[methodName])(...(args ?? [])); + } else { + await (invokee[methodName])(args); + } + }; + + for (const step of steps) { this.log(`Launching ${String(step.action)}`, Level.LOG); @@ -427,10 +496,20 @@ export default class Interpreter extends EventEmitter { invokee = invokee[level]; } - if (!step.args || Array.isArray(step.args)) { - await (invokee[methodName])(...(step.args ?? [])); + if (methodName === 'waitForLoadState') { + try { + await executeAction(invokee, methodName, step.args); + } catch (error) { + await executeAction(invokee, methodName, 'domcontentloaded'); + } + } else if (methodName === 'click') { + try { + await executeAction(invokee, methodName, step.args); + } catch (error) { + await executeAction(invokee, methodName, [step.args[0], { force: true }]); + } } else { - await (invokee[methodName])(step.args); + await executeAction(invokee, methodName, step.args); } } @@ -475,6 +554,8 @@ export default class Interpreter extends EventEmitter { case 'clickNext': const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); + // console.log("Page results:", pageResults); + // Filter out already scraped items const newResults = pageResults.filter(item => { const uniqueKey = JSON.stringify(item); @@ -482,9 +563,9 @@ export default class Interpreter extends EventEmitter { scrapedItems.add(uniqueKey); // Mark as scraped return true; }); - + allResults = allResults.concat(newResults); - + if (config.limit && allResults.length >= config.limit) { return allResults.slice(0, config.limit); } @@ -494,7 +575,7 @@ export default class Interpreter extends EventEmitter { return allResults; // No more pages to scrape } await Promise.all([ - nextButton.click(), + nextButton.dispatchEvent('click'), page.waitForNavigation({ waitUntil: 'networkidle' }) ]); @@ -510,7 +591,7 @@ export default class Interpreter extends EventEmitter { return allResults; } // Click the 'Load More' button to load additional items - await loadMoreButton.click(); + await loadMoreButton.dispatchEvent('click'); await page.waitForTimeout(2000); // Wait for new items to load // After clicking 'Load More', scroll down to load more items await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); @@ -546,11 +627,31 @@ export default class Interpreter extends EventEmitter { return allResults; } + private getMatchingActionId(workflow: Workflow, pageState: PageState, usedActions: string[]) { + for (let actionId = workflow.length - 1; actionId >= 0; actionId--) { + const step = workflow[actionId]; + const isApplicable = this.applicable(step.where, pageState, usedActions); + console.log("-------------------------------------------------------------"); + console.log(`Where:`, step.where); + console.log(`Page state:`, pageState); + console.log(`Match result: ${isApplicable}`); + console.log("-------------------------------------------------------------"); + + if (isApplicable) { + return actionId; + } + } + } + private async runLoop(p: Page, workflow: Workflow) { + const workflowCopy: Workflow = JSON.parse(JSON.stringify(workflow)); + // apply ad-blocker to the current page await this.applyAdBlocker(p); const usedActions: string[] = []; + let selectors: string[] = []; let lastAction = null; + let actionId = -1 let repeatCount = 0; /** @@ -559,7 +660,7 @@ export default class Interpreter extends EventEmitter { * e.g. via `enqueueLinks`. */ p.on('popup', (popup) => { - this.concurrency.addJob(() => this.runLoop(popup, workflow)); + this.concurrency.addJob(() => this.runLoop(popup, workflowCopy)); }); /* eslint no-constant-condition: ["warn", { "checkLoops": false }] */ @@ -578,8 +679,11 @@ export default class Interpreter extends EventEmitter { } let pageState = {}; + let getStateTest = "Hello"; try { - pageState = await this.getState(p, workflow); + pageState = await this.getState(p, workflowCopy, selectors); + selectors = []; + console.log("Empty selectors:", selectors) } catch (e: any) { this.log('The browser has been closed.'); return; @@ -589,32 +693,52 @@ export default class Interpreter extends EventEmitter { this.log(`Current state is: \n${JSON.stringify(pageState, null, 2)}`, Level.WARN); } - const actionId = workflow.findIndex((step) => { - const isApplicable = this.applicable(step.where, pageState, usedActions); - console.log(`Where:`, step.where); - console.log(`Page state:`, pageState); - console.log(`Match result: ${isApplicable}`); - return isApplicable; - }); + // const actionId = workflow.findIndex((step) => { + // const isApplicable = this.applicable(step.where, pageState, usedActions); + // console.log("-------------------------------------------------------------"); + // console.log(`Where:`, step.where); + // console.log(`Page state:`, pageState); + // console.log(`Match result: ${isApplicable}`); + // console.log("-------------------------------------------------------------"); + // return isApplicable; + // }); + + actionId = this.getMatchingActionId(workflowCopy, pageState, usedActions); - const action = workflow[actionId]; + const action = workflowCopy[actionId]; + console.log("MATCHED ACTION:", action); + console.log("MATCHED ACTION ID:", actionId); this.log(`Matched ${JSON.stringify(action?.where)}`, Level.LOG); if (action) { // action is matched if (this.options.debugChannel?.activeId) { this.options.debugChannel.activeId(actionId); } - + repeatCount = action === lastAction ? repeatCount + 1 : 0; - if (this.options.maxRepeats && repeatCount >= this.options.maxRepeats) { + + console.log("REPEAT COUNT", repeatCount); + if (this.options.maxRepeats && repeatCount > this.options.maxRepeats) { return; } lastAction = action; - + try { + console.log("Carrying out:", action.what); await this.carryOutSteps(p, action.what); usedActions.push(action.id ?? 'undefined'); + + workflowCopy.splice(actionId, 1); + console.log(`Action with ID ${action.id} removed from the workflow copy.`); + + // const newSelectors = this.getPreviousSelectors(workflow, actionId); + const newSelectors = this.getSelectors(workflowCopy); + newSelectors.forEach(selector => { + if (!selectors.includes(selector)) { + selectors.push(selector); + } + }); } catch (e) { this.log(e, Level.ERROR); } diff --git a/package.json b/package.json index 7ca27e613..84ed59eaa 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "maxun", - "version": "0.0.3", + "version": "0.0.4", "author": "Maxun", "license": "AGPL-3.0-or-later", "dependencies": { @@ -36,16 +36,20 @@ "fortawesome": "^0.0.1-security", "google-auth-library": "^9.14.1", "googleapis": "^144.0.0", + "i18next": "^24.0.2", "i18next-browser-languagedetector": "^8.0.0", "i18next-http-backend": "^3.0.1", + + "idcac-playwright": "^0.1.3", + "ioredis": "^5.4.1", "joi": "^17.6.0", "jsonwebtoken": "^9.0.2", "jwt-decode": "^4.0.0", "loglevel": "^1.8.0", "loglevel-plugin-remote": "^0.6.8", - "maxun-core": "0.0.4", + "maxun-core": "^0.0.5", "minio": "^8.0.1", "moment-timezone": "^0.5.45", "node-cron": "^3.0.3", diff --git a/server/src/api/record.ts b/server/src/api/record.ts index 5b33b12f6..05560487d 100644 --- a/server/src/api/record.ts +++ b/server/src/api/record.ts @@ -15,6 +15,8 @@ import { io, Socket } from "socket.io-client"; import { BinaryOutputService } from "../storage/mino"; import { AuthenticatedRequest } from "../routes/record" import {capture} from "../utils/analytics"; +import { Page } from "playwright"; +import { WorkflowFile } from "maxun-core"; chromium.use(stealthPlugin()); const formatRecording = (recordingData: any) => { @@ -533,6 +535,17 @@ function resetRecordingState(browserId: string, id: string) { id = ''; } +function AddGeneratedFlags(workflow: WorkflowFile) { + const copy = JSON.parse(JSON.stringify(workflow)); + for (let i = 0; i < workflow.workflow.length; i++) { + copy.workflow[i].what.unshift({ + action: 'flag', + args: ['generated'], + }); + } + return copy; +}; + async function executeRun(id: string) { try { const run = await Run.findOne({ where: { runId: id } }); @@ -560,13 +573,14 @@ async function executeRun(id: string) { throw new Error('Could not access browser'); } - const currentPage = await browser.getCurrentPage(); + let currentPage = await browser.getCurrentPage(); if (!currentPage) { throw new Error('Could not create a new page'); } + const workflow = AddGeneratedFlags(recording.recording); const interpretationInfo = await browser.interpreter.InterpretRecording( - recording.recording, currentPage, plainRun.interpreterSettings + workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings ); const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index 769787da7..417e8964d 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -15,6 +15,7 @@ import { InterpreterSettings, RemoteBrowserOptions } from "../../types"; import { WorkflowGenerator } from "../../workflow-management/classes/Generator"; import { WorkflowInterpreter } from "../../workflow-management/classes/Interpreter"; import { getDecryptedProxyConfig } from '../../routes/proxy'; +import { getInjectableScript } from 'idcac-playwright'; chromium.use(stealthPlugin()); @@ -65,6 +66,8 @@ export class RemoteBrowser { maxRepeats: 1, }; + private lastEmittedUrl: string | null = null; + /** * {@link WorkflowGenerator} instance specific to the remote browser. */ @@ -87,6 +90,64 @@ export class RemoteBrowser { this.generator = new WorkflowGenerator(socket); } + /** + * Normalizes URLs to prevent navigation loops while maintaining consistent format + */ + private normalizeUrl(url: string): string { + try { + const parsedUrl = new URL(url); + // Remove trailing slashes except for root path + parsedUrl.pathname = parsedUrl.pathname.replace(/\/+$/, '') || '/'; + // Ensure consistent protocol handling + parsedUrl.protocol = parsedUrl.protocol.toLowerCase(); + return parsedUrl.toString(); + } catch { + return url; + } + } + + /** + * Determines if a URL change is significant enough to emit + */ + private shouldEmitUrlChange(newUrl: string): boolean { + if (!this.lastEmittedUrl) { + return true; + } + const normalizedNew = this.normalizeUrl(newUrl); + const normalizedLast = this.normalizeUrl(this.lastEmittedUrl); + return normalizedNew !== normalizedLast; + } + + private async setupPageEventListeners(page: Page) { + page.on('framenavigated', async (frame) => { + if (frame === page.mainFrame()) { + const currentUrl = page.url(); + if (this.shouldEmitUrlChange(currentUrl)) { + this.lastEmittedUrl = currentUrl; + this.socket.emit('urlChanged', currentUrl); + } + } + }); + + // Handle page load events with retry mechanism + page.on('load', async () => { + const injectScript = async (): Promise => { + try { + await page.waitForLoadState('networkidle', { timeout: 5000 }); + + await page.evaluate(getInjectableScript()); + return true; + } catch (error: any) { + logger.log('warn', `Script injection attempt failed: ${error.message}`); + return false; + } + }; + + const success = await injectScript(); + console.log("Script injection result:", success); + }); + } + /** * An asynchronous constructor for asynchronously initialized properties. * Must be called right after creating an instance of RemoteBrowser class. @@ -166,11 +227,7 @@ export class RemoteBrowser { this.context = await this.browser.newContext(contextOptions); this.currentPage = await this.context.newPage(); - this.currentPage.on('framenavigated', (frame) => { - if (frame === this.currentPage?.mainFrame()) { - this.socket.emit('urlChanged', this.currentPage.url()); - } - }); + await this.setupPageEventListeners(this.currentPage); // await this.currentPage.setExtraHTTPHeaders({ // 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' @@ -370,11 +427,7 @@ export class RemoteBrowser { await this.stopScreencast(); this.currentPage = page; - this.currentPage.on('framenavigated', (frame) => { - if (frame === this.currentPage?.mainFrame()) { - this.socket.emit('urlChanged', this.currentPage.url()); - } - }); + await this.setupPageEventListeners(this.currentPage); //await this.currentPage.setViewportSize({ height: 400, width: 900 }) this.client = await this.currentPage.context().newCDPSession(this.currentPage); @@ -402,14 +455,8 @@ export class RemoteBrowser { await this.currentPage?.close(); this.currentPage = newPage; if (this.currentPage) { - this.currentPage.on('framenavigated', (frame) => { - if (frame === this.currentPage?.mainFrame()) { - this.socket.emit('urlChanged', this.currentPage.url()); - } - }); - // this.currentPage.on('load', (page) => { - // this.socket.emit('urlChanged', page.url()); - // }) + await this.setupPageEventListeners(this.currentPage); + this.client = await this.currentPage.context().newCDPSession(this.currentPage); await this.subscribeToScreencast(); } else { diff --git a/server/src/routes/storage.ts b/server/src/routes/storage.ts index d1f648f86..ddadf240e 100644 --- a/server/src/routes/storage.ts +++ b/server/src/routes/storage.ts @@ -18,6 +18,8 @@ import { AuthenticatedRequest } from './record'; import { computeNextRun } from '../utils/schedule'; import { capture } from "../utils/analytics"; import { tryCatch } from 'bullmq'; +import { WorkflowFile } from 'maxun-core'; +import { Page } from 'playwright'; chromium.use(stealthPlugin()); export const router = Router(); @@ -422,6 +424,17 @@ router.get('/runs/run/:id', requireSignIn, async (req, res) => { } }); +function AddGeneratedFlags(workflow: WorkflowFile) { + const copy = JSON.parse(JSON.stringify(workflow)); + for (let i = 0; i < workflow.workflow.length; i++) { + copy.workflow[i].what.unshift({ + action: 'flag', + args: ['generated'], + }); + } + return copy; +}; + /** * PUT endpoint for finishing a run and saving it to the storage. */ @@ -443,10 +456,11 @@ router.post('/runs/run/:id', requireSignIn, async (req: AuthenticatedRequest, re // interpret the run in active browser const browser = browserPool.getRemoteBrowser(plainRun.browserId); - const currentPage = browser?.getCurrentPage(); + let currentPage = browser?.getCurrentPage(); if (browser && currentPage) { + const workflow = AddGeneratedFlags(recording.recording); const interpretationInfo = await browser.interpreter.InterpretRecording( - recording.recording, currentPage, plainRun.interpreterSettings); + workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings); const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput); await destroyRemoteBrowser(plainRun.browserId); diff --git a/server/src/workflow-management/classes/Interpreter.ts b/server/src/workflow-management/classes/Interpreter.ts index d53259b7d..b982b172a 100644 --- a/server/src/workflow-management/classes/Interpreter.ts +++ b/server/src/workflow-management/classes/Interpreter.ts @@ -244,7 +244,12 @@ export class WorkflowInterpreter { * @param page The page instance used to interact with the browser. * @param settings The settings to use for the interpretation. */ - public InterpretRecording = async (workflow: WorkflowFile, page: Page, settings: InterpreterSettings) => { + public InterpretRecording = async ( + workflow: WorkflowFile, + page: Page, + updatePageOnPause: (page: Page) => void, + settings: InterpreterSettings + ) => { const params = settings.params ? settings.params : null; delete settings.params; @@ -262,7 +267,7 @@ export class WorkflowInterpreter { this.socket.emit('debugMessage', msg) }, }, - serializableCallback: (data: string) => { + serializableCallback: (data: any) => { this.serializableData.push(data); this.socket.emit('serializableCallback', data); }, @@ -275,6 +280,23 @@ export class WorkflowInterpreter { const interpreter = new Interpreter(decryptedWorkflow, options); this.interpreter = interpreter; + interpreter.on('flag', async (page, resume) => { + if (this.activeId !== null && this.breakpoints[this.activeId]) { + logger.log('debug', `breakpoint hit id: ${this.activeId}`); + this.socket.emit('breakpointHit'); + this.interpretationIsPaused = true; + } + + if (this.interpretationIsPaused) { + this.interpretationResume = resume; + logger.log('debug', `Paused inside of flag: ${page.url()}`); + updatePageOnPause(page); + this.socket.emit('log', '----- The interpretation has been paused -----', false); + } else { + resume(); + } + }); + const status = await interpreter.run(page, params); const lastArray = this.serializableData.length > 1 diff --git a/server/src/workflow-management/scheduler/index.ts b/server/src/workflow-management/scheduler/index.ts index 02ca905fc..169b0061c 100644 --- a/server/src/workflow-management/scheduler/index.ts +++ b/server/src/workflow-management/scheduler/index.ts @@ -11,6 +11,8 @@ import Run from "../../models/Run"; import { getDecryptedProxyConfig } from "../../routes/proxy"; import { BinaryOutputService } from "../../storage/mino"; import { capture } from "../../utils/analytics"; +import { WorkflowFile } from "maxun-core"; +import { Page } from "playwright"; chromium.use(stealthPlugin()); async function createWorkflowAndStoreMetadata(id: string, userId: string) { @@ -79,6 +81,17 @@ async function createWorkflowAndStoreMetadata(id: string, userId: string) { } } +function AddGeneratedFlags(workflow: WorkflowFile) { + const copy = JSON.parse(JSON.stringify(workflow)); + for (let i = 0; i < workflow.workflow.length; i++) { + copy.workflow[i].what.unshift({ + action: 'flag', + args: ['generated'], + }); + } + return copy; +}; + async function executeRun(id: string) { try { const run = await Run.findOne({ where: { runId: id } }); @@ -106,13 +119,15 @@ async function executeRun(id: string) { throw new Error('Could not access browser'); } - const currentPage = await browser.getCurrentPage(); + let currentPage = await browser.getCurrentPage(); if (!currentPage) { throw new Error('Could not create a new page'); } + const workflow = AddGeneratedFlags(recording.recording); const interpretationInfo = await browser.interpreter.InterpretRecording( - recording.recording, currentPage, plainRun.interpreterSettings); + workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings + ); const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput); diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 193de8910..917ac561c 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -20,49 +20,6 @@ type Workflow = WorkflowFile["workflow"]; * @category WorkflowManagement-Selectors * @returns {Promise} */ -export const getRect = async (page: Page, coordinates: Coordinates) => { - try { - const rect = await page.evaluate( - async ({ x, y }) => { - const el = document.elementFromPoint(x, y) as HTMLElement; - if (el) { - const { parentElement } = el; - // Match the logic in recorder.ts for link clicks - const element = parentElement?.tagName === 'A' ? parentElement : el; - const rectangle = element?.getBoundingClientRect(); - // @ts-ignore - if (rectangle) { - return { - x: rectangle.x, - y: rectangle.y, - width: rectangle.width, - height: rectangle.height, - top: rectangle.top, - right: rectangle.right, - bottom: rectangle.bottom, - left: rectangle.left, - }; - } - } - }, - { x: coordinates.x, y: coordinates.y }, - ); - return rect; - } catch (error) { - const { message, stack } = error as Error; - logger.log('error', `Error while retrieving selector: ${message}`); - logger.log('error', `Stack: ${stack}`); - } -} - -/** - * Checks the basic info about an element and returns a {@link BaseActionInfo} object. - * If the element is not found, returns undefined. - * @param page The page instance. - * @param coordinates Coordinates of an element. - * @category WorkflowManagement-Selectors - * @returns {Promise} - */ export const getElementInformation = async ( page: Page, coordinates: Coordinates @@ -70,10 +27,50 @@ export const getElementInformation = async ( try { const elementInfo = await page.evaluate( async ({ x, y }) => { - const el = document.elementFromPoint(x, y) as HTMLElement; - if (el) { - const { parentElement } = el; - const element = parentElement?.tagName === 'A' ? parentElement : el; + const originalEl = document.elementFromPoint(x, y) as HTMLElement; + if (originalEl) { + let element = originalEl; + + // if (originalEl.tagName === 'A') { + // element = originalEl; + // } else if (originalEl.parentElement?.tagName === 'A') { + // element = originalEl.parentElement; + // } else { + // Generic parent finding logic based on visual containment + const containerTags = ['DIV', 'SECTION', 'ARTICLE', 'MAIN', 'HEADER', 'FOOTER', 'NAV', 'ASIDE', + 'ADDRESS', 'BLOCKQUOTE', 'DETAILS', 'DIALOG', 'FIGURE', 'FIGCAPTION', 'MAIN', 'MARK', 'SUMMARY', 'TIME', + 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TR', 'TH', 'TD', 'CAPTION', 'COLGROUP', 'COL', 'FORM', 'FIELDSET', + 'LEGEND', 'LABEL', 'INPUT', 'BUTTON', 'SELECT', 'DATALIST', 'OPTGROUP', 'OPTION', 'TEXTAREA', 'OUTPUT', + 'PROGRESS', 'METER', 'DETAILS', 'SUMMARY', 'MENU', 'MENUITEM', 'MENUITEM', 'APPLET', 'EMBED', 'OBJECT', + 'PARAM', 'VIDEO', 'AUDIO', 'SOURCE', 'TRACK', 'CANVAS', 'MAP', 'AREA', 'SVG', 'IFRAME', 'FRAME', 'FRAMESET', + 'LI', 'UL', 'OL', 'DL', 'DT', 'DD', 'HR', 'P', 'PRE', 'LISTING', 'PLAINTEXT', 'A' + ]; + while (element.parentElement) { + const parentRect = element.parentElement.getBoundingClientRect(); + const childRect = element.getBoundingClientRect(); + + if (!containerTags.includes(element.parentElement.tagName)) { + break; + } + + // Check if parent visually contains the child + const fullyContained = + parentRect.left <= childRect.left && + parentRect.right >= childRect.right && + parentRect.top <= childRect.top && + parentRect.bottom >= childRect.bottom; + + // Additional checks for more comprehensive containment + const significantOverlap = + (childRect.width * childRect.height) / + (parentRect.width * parentRect.height) > 0.5; + + if (fullyContained && significantOverlap) { + element = element.parentElement; + } else { + break; + // } + } } let info: { tagName: string; @@ -98,7 +95,7 @@ export const getElementInformation = async ( ); } - // Gather specific information based on the tag + // Existing tag-specific logic if (element?.tagName === 'A') { info.url = (element as HTMLAnchorElement).href; info.innerText = element.innerText ?? ''; @@ -112,7 +109,6 @@ export const getElementInformation = async ( info.innerHTML = element.innerHTML; info.outerHTML = element.outerHTML; - return info; } return null; @@ -127,6 +123,80 @@ export const getElementInformation = async ( } }; +export const getRect = async (page: Page, coordinates: Coordinates) => { + try { + const rect = await page.evaluate( + async ({ x, y }) => { + const originalEl = document.elementFromPoint(x, y) as HTMLElement; + if (originalEl) { + let element = originalEl; + + // if (originalEl.tagName === 'A') { + // element = originalEl; + // } else if (originalEl.parentElement?.tagName === 'A') { + // element = originalEl.parentElement; + // } else { + const containerTags = ['DIV', 'SECTION', 'ARTICLE', 'MAIN', 'HEADER', 'FOOTER', 'NAV', 'ASIDE', + 'ADDRESS', 'BLOCKQUOTE', 'DETAILS', 'DIALOG', 'FIGURE', 'FIGCAPTION', 'MAIN', 'MARK', 'SUMMARY', 'TIME', + 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TR', 'TH', 'TD', 'CAPTION', 'COLGROUP', 'COL', 'FORM', 'FIELDSET', + 'LEGEND', 'LABEL', 'INPUT', 'BUTTON', 'SELECT', 'DATALIST', 'OPTGROUP', 'OPTION', 'TEXTAREA', 'OUTPUT', + 'PROGRESS', 'METER', 'DETAILS', 'SUMMARY', 'MENU', 'MENUITEM', 'MENUITEM', 'APPLET', 'EMBED', 'OBJECT', + 'PARAM', 'VIDEO', 'AUDIO', 'SOURCE', 'TRACK', 'CANVAS', 'MAP', 'AREA', 'SVG', 'IFRAME', 'FRAME', 'FRAMESET', + 'LI', 'UL', 'OL', 'DL', 'DT', 'DD', 'HR', 'P', 'PRE', 'LISTING', 'PLAINTEXT', 'A' + ]; + while (element.parentElement) { + const parentRect = element.parentElement.getBoundingClientRect(); + const childRect = element.getBoundingClientRect(); + + if (!containerTags.includes(element.parentElement.tagName)) { + break; + } + + + const fullyContained = + parentRect.left <= childRect.left && + parentRect.right >= childRect.right && + parentRect.top <= childRect.top && + parentRect.bottom >= childRect.bottom; + + const significantOverlap = + (childRect.width * childRect.height) / + (parentRect.width * parentRect.height) > 0.5; + + if (fullyContained && significantOverlap) { + element = element.parentElement; + } else { + break; + // } + }} + + //element = element?.parentElement?.tagName === 'A' ? element?.parentElement : element; + const rectangle = element?.getBoundingClientRect(); + + if (rectangle) { + return { + x: rectangle.x, + y: rectangle.y, + width: rectangle.width, + height: rectangle.height, + top: rectangle.top, + right: rectangle.right, + bottom: rectangle.bottom, + left: rectangle.left, + }; + } + } + }, + { x: coordinates.x, y: coordinates.y }, + ); + return rect; + } catch (error) { + const { message, stack } = error as Error; + logger.log('error', `Error while retrieving selector: ${message}`); + logger.log('error', `Stack: ${stack}`); + } +} + /** * Returns the best and unique css {@link Selectors} for the element on the page. @@ -742,7 +812,6 @@ interface SelectorResult { export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates): Promise => { try { const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => { - function getNonUniqueSelector(element: HTMLElement): string { let selector = element.tagName.toLowerCase(); @@ -774,8 +843,44 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates return path.join(' > '); } - const element = document.elementFromPoint(x, y) as HTMLElement | null; - if (!element) return null; + const originalEl = document.elementFromPoint(x, y) as HTMLElement; + if (!originalEl) return null; + + let element = originalEl; + + const containerTags = ['DIV', 'SECTION', 'ARTICLE', 'MAIN', 'HEADER', 'FOOTER', 'NAV', 'ASIDE', + 'ADDRESS', 'BLOCKQUOTE', 'DETAILS', 'DIALOG', 'FIGURE', 'FIGCAPTION', 'MAIN', 'MARK', 'SUMMARY', 'TIME', + 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TR', 'TH', 'TD', 'CAPTION', 'COLGROUP', 'COL', 'FORM', 'FIELDSET', + 'LEGEND', 'LABEL', 'INPUT', 'BUTTON', 'SELECT', 'DATALIST', 'OPTGROUP', 'OPTION', 'TEXTAREA', 'OUTPUT', + 'PROGRESS', 'METER', 'DETAILS', 'SUMMARY', 'MENU', 'MENUITEM', 'MENUITEM', 'APPLET', 'EMBED', 'OBJECT', + 'PARAM', 'VIDEO', 'AUDIO', 'SOURCE', 'TRACK', 'CANVAS', 'MAP', 'AREA', 'SVG', 'IFRAME', 'FRAME', 'FRAMESET', + 'LI', 'UL', 'OL', 'DL', 'DT', 'DD', 'HR', 'P', 'PRE', 'LISTING', 'PLAINTEXT', 'A' + ]; + + while (element.parentElement) { + const parentRect = element.parentElement.getBoundingClientRect(); + const childRect = element.getBoundingClientRect(); + + if (!containerTags.includes(element.parentElement.tagName)) { + break; + } + + const fullyContained = + parentRect.left <= childRect.left && + parentRect.right >= childRect.right && + parentRect.top <= childRect.top && + parentRect.bottom >= childRect.bottom; + + const significantOverlap = + (childRect.width * childRect.height) / + (parentRect.width * parentRect.height) > 0.5; + + if (fullyContained && significantOverlap) { + element = element.parentElement; + } else { + break; + } + } const generalSelector = getSelectorPath(element); return { @@ -790,7 +895,6 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates } }; - export const getChildSelectors = async (page: Page, parentSelector: string): Promise => { try { const childSelectors = await page.evaluate((parentSelector: string) => { diff --git a/src/components/molecules/IntegrationSettings.tsx b/src/components/molecules/IntegrationSettings.tsx index c31605dea..b34bc0e92 100644 --- a/src/components/molecules/IntegrationSettings.tsx +++ b/src/components/molecules/IntegrationSettings.tsx @@ -15,11 +15,13 @@ import { useGlobalInfoStore } from "../../context/globalInfo"; import { getStoredRecording } from "../../api/storage"; import { apiUrl } from "../../apiConfig.js"; import Cookies from 'js-cookie'; + interface IntegrationProps { isOpen: boolean; handleStart: (data: IntegrationSettings) => void; handleClose: () => void; } + export interface IntegrationSettings { spreadsheetId: string; spreadsheetName: string; @@ -75,8 +77,7 @@ export const IntegrationSettingsModal = ({ ); notify( "error", - `Error fetching spreadsheet files: ${ - error.response?.data?.message || error.message + `Error fetching spreadsheet files: ${error.response?.data?.message || error.message }` ); } diff --git a/src/components/molecules/NavBar.tsx b/src/components/molecules/NavBar.tsx index dc161ab22..ead395c54 100644 --- a/src/components/molecules/NavBar.tsx +++ b/src/components/molecules/NavBar.tsx @@ -1,17 +1,32 @@ -import React, { useState, useContext } from "react"; -import axios from "axios"; + + + + + + + + + + + + +import { useTranslation } from "react-i18next"; // Import useTranslation hook + +import React, { useState, useContext, useEffect } from 'react'; +import axios from 'axios'; import styled from "styled-components"; import { stopRecording } from "../../api/recording"; import { useGlobalInfoStore } from "../../context/globalInfo"; -import { IconButton, Menu, MenuItem, Typography, Chip } from "@mui/material"; -import { AccountCircle, Logout, Clear, Language } from "@mui/icons-material"; -import { useNavigate } from "react-router-dom"; -import { AuthContext } from "../../context/auth"; -import { SaveRecording } from "../molecules/SaveRecording"; -import DiscordIcon from "../atoms/DiscordIcon"; -import { apiUrl } from "../../apiConfig"; +import { IconButton, Menu, MenuItem, Typography, Chip, Button, Modal, Tabs, Tab, Box, Snackbar } from "@mui/material"; +import { AccountCircle, Logout, Clear, YouTube, X, Update, Close,Language } from "@mui/icons-material"; +import { useNavigate } from 'react-router-dom'; +import { AuthContext } from '../../context/auth'; +import { SaveRecording } from '../molecules/SaveRecording'; +import DiscordIcon from '../atoms/DiscordIcon'; +import { apiUrl } from '../../apiConfig'; import MaxunLogo from "../../assets/maxunlogo.png"; -import { useTranslation } from "react-i18next"; // Import useTranslation hook +import packageJson from "../../../package.json" + interface NavBarProps { recordingName: string; @@ -29,8 +44,43 @@ export const NavBar: React.FC = ({ const { t, i18n } = useTranslation(); // Get translation function and i18n methods const [anchorEl, setAnchorEl] = useState(null); + const [langAnchorEl, setLangAnchorEl] = useState(null); + const currentVersion = packageJson.version; + + const [open, setOpen] = useState(false); + const [latestVersion, setLatestVersion] = useState(null); + const [tab, setTab] = useState(0); + const [isUpdateAvailable, setIsUpdateAvailable] = useState(false); + + const fetchLatestVersion = async (): Promise => { + try { + const response = await fetch("https://api.github.com/repos/getmaxun/maxun/releases/latest"); + const data = await response.json(); + const version = data.tag_name.replace(/^v/, ""); // Remove 'v' prefix + return version; + } catch (error) { + console.error("Failed to fetch latest version:", error); + return null; // Handle errors gracefully + } + }; + + const handleUpdateOpen = () => { + setOpen(true); + fetchLatestVersion(); + }; + + const handleUpdateClose = () => { + setOpen(false); + setTab(0); // Reset tab to the first tab + }; + + const handleUpdateTabChange = (event: React.SyntheticEvent, newValue: number) => { + setTab(newValue); + }; + + const handleMenuOpen = (event: React.MouseEvent) => { setAnchorEl(event.currentTarget); }; @@ -66,7 +116,19 @@ export const NavBar: React.FC = ({ localStorage.setItem("language", lang); // Persist language to localStorage }; + useEffect(() => { + const checkForUpdates = async () => { + const latestVersion = await fetchLatestVersion(); + setLatestVersion(latestVersion); // Set the latest version state + if (latestVersion && latestVersion !== currentVersion) { + setIsUpdateAvailable(true); // Show a notification or highlight the "Upgrade" button + } + }; + checkForUpdates(); + }, []); + return ( +
= ({ + + ); }; diff --git a/src/components/molecules/Pair.tsx b/src/components/molecules/Pair.tsx index b05b912de..3c332600e 100644 --- a/src/components/molecules/Pair.tsx +++ b/src/components/molecules/Pair.tsx @@ -1,5 +1,5 @@ import React, { FC, useState } from 'react'; -import { Stack, Button, IconButton, Tooltip, Chip, Badge } from "@mui/material"; +import { Stack, Button, IconButton, Tooltip, Badge } from "@mui/material"; import { AddPair, deletePair, UpdatePair } from "../../api/workflow"; import { WorkflowFile } from "maxun-core"; import { ClearButton } from "../atoms/buttons/ClearButton"; diff --git a/src/components/molecules/RobotDuplicate.tsx b/src/components/molecules/RobotDuplicate.tsx index 850614b0f..38b7b4224 100644 --- a/src/components/molecules/RobotDuplicate.tsx +++ b/src/components/molecules/RobotDuplicate.tsx @@ -1,6 +1,6 @@ import React, { useState, useEffect } from 'react'; import { GenericModal } from "../atoms/GenericModal"; -import { TextField, Typography, Box, Button, Chip } from "@mui/material"; +import { TextField, Typography, Box, Button } from "@mui/material"; import { modalStyle } from "./AddWhereCondModal"; import { useGlobalInfoStore } from '../../context/globalInfo'; import { duplicateRecording, getStoredRecording } from '../../api/storage'; diff --git a/src/context/browserActions.tsx b/src/context/browserActions.tsx index ef303f822..55ca1b371 100644 --- a/src/context/browserActions.tsx +++ b/src/context/browserActions.tsx @@ -53,6 +53,7 @@ export const ActionProvider = ({ children }: { children: ReactNode }) => { const startPaginationMode = () => { setPaginationMode(true); setCaptureStage('pagination'); + socket?.emit('setGetList', { getList: false }); }; const stopPaginationMode = () => setPaginationMode(false); @@ -75,7 +76,6 @@ export const ActionProvider = ({ children }: { children: ReactNode }) => { const stopGetList = () => { setGetList(false); - socket?.emit('setGetList', { getList: false }); setPaginationType(''); setLimitType(''); setCustomLimit('');