Skip to content

Commit

Permalink
New: Allow limiting scraping concurrency
Browse files Browse the repository at this point in the history
  • Loading branch information
brafdlog committed Oct 21, 2022
1 parent c03392d commit f349be5
Show file tree
Hide file tree
Showing 7 changed files with 20 additions and 2 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"@sentry/electron": "^2.5.0",
"@vue/composition-api": "^1.0.0-rc.6",
"analytics-node": "^5.1.0",
"bottleneck": "^2.19.5",
"core-js": "^3.9.1",
"csv-parse": "^4.14.1",
"csv-stringify": "^5.6.2",
Expand Down
1 change: 1 addition & 0 deletions src/backend/commonTypes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export interface Config {
showBrowser: boolean;
accountsToScrape: AccountToScrapeConfig[];
chromiumPath?: string;
maxConcurrency?: number;
},
useReactUI?: boolean
}
Expand Down
7 changes: 6 additions & 1 deletion src/backend/import/importTransactions.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Transaction } from 'israeli-bank-scrapers-core/lib/transactions';
import Bottleneck from 'bottleneck';
import _ from 'lodash';
import moment from 'moment';
import { configFilePath, userDataPath } from '@/app-globals';
Expand Down Expand Up @@ -27,11 +28,15 @@ export async function scrapeFinancialAccountsAndFetchTransactions(scrapingConfig
} else {
chromiumPath = await getChrome(userDataPath, (percent) => emitChromeDownload(eventPublisher, percent));
}

const limiter = new Bottleneck({
maxConcurrent: scrapingConfig.maxConcurrency
});
const scrapePromises = scrapingConfig.accountsToScrape
.filter((accountToScrape) => accountToScrape.active !== false)
.map(async (accountToScrape) => ({
id: accountToScrape.id,
transactions: await fetchTransactions(accountToScrape, startDate, scrapingConfig.showBrowser, eventPublisher, chromiumPath)
transactions: await limiter.schedule(() => fetchTransactions(accountToScrape, startDate, scrapingConfig.showBrowser, eventPublisher, chromiumPath))
}));

const promiseResults = await Promise.allSettled(scrapePromises);
Expand Down
6 changes: 6 additions & 0 deletions ui-react/src/Store.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,12 @@ export default class Store {
await updateConfig(this.config);
}

async setMaxConcurrency(maxConcurrency: number) {
this.verifyConfigDefined();
this.config.scraping.maxConcurrency = maxConcurrency;
await updateConfig(this.config);
}

async setChromiumPath(chromiumPath?: string) {
this.verifyConfigDefined();
this.config.scraping.chromiumPath = chromiumPath;
Expand Down
4 changes: 4 additions & 0 deletions ui-react/src/components/GeneralSettings.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ function GeneralSettings() {
<Form.Label>כמה ימים אחורה לחפש?</Form.Label>
<Form.Control className={styles.input} defaultValue={store.config?.scraping.numDaysBack} onBlur={(event) => store.setNumDaysBack(event.target.value)}/>
</Form.Group>
<Form.Group>
<Form.Label>כמה חשבונות לשלוף במקביל?</Form.Label>
<Form.Control className={styles.input} defaultValue={store.config?.scraping.maxConcurrency} onBlur={(event) => store.setMaxConcurrency(event.target.value)}/>
</Form.Group>
<Form.Group>
<Form.Label>Chromium path</Form.Label>
<Form.Control className={styles.input} defaultValue={store.config?.scraping.chromiumPath} onBlur={(event) => store.setChromiumPath(event.target.value)}/>
Expand Down
1 change: 1 addition & 0 deletions ui-react/src/types.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ export interface Config {
showBrowser: boolean;
accountsToScrape: AccountToScrapeConfig[];
chromiumPath?: string;
maxConcurrency?: number;
};
}

Expand Down
2 changes: 1 addition & 1 deletion yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4237,7 +4237,7 @@ boolean@^3.0.1:
resolved "https://registry.yarnpkg.com/boolean/-/boolean-3.1.2.tgz#e30f210a26b02458482a8cc353ab06f262a780c2"
integrity sha512-YN6UmV0FfLlBVvRvNPx3pz5W/mUoYB24J4WSXOKP/OOJpi+Oq6WYqPaNTHzjI0QzwWtnvEd5CGYyQPgp1jFxnw==

bottleneck@^2.18.1:
bottleneck@^2.18.1, bottleneck@^2.19.5:
version "2.19.5"
resolved "https://registry.yarnpkg.com/bottleneck/-/bottleneck-2.19.5.tgz#5df0b90f59fd47656ebe63c78a98419205cadd91"
integrity sha512-VHiNCbI1lKdl44tGrhNfU3lup0Tj/ZBMJB5/2ZbNXRCPuRCO7ed2mgcK4r17y+KB2EfuYuRaVlwNbAeaWGSpbw==
Expand Down

0 comments on commit f349be5

Please sign in to comment.