Skip to content

Commit

Permalink
Replacing script.src with empty string if it contains load.php
Browse files Browse the repository at this point in the history
Added test for getting articles from vikidia.org
  • Loading branch information
pavel-karatsiuba committed Feb 16, 2023
1 parent 5b65baa commit 0431b73
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/util/const.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ export const WEBP_CANDIDATE_IMAGE_MIME_TYPE = /image+[/]+(jpeg|png)/
export const DEFAULT_WIKI_PATH = 'wiki/'
export const ALL_READY_FUNCTION = /function allReady\( modules \) {/
export const DO_PROPAGATION = /mw\.requestIdleCallback\( doPropagation, \{ timeout: 1 \} \);/
export const LOAD_PHP = /script.src = "\/\/en.vikidia.org\/w\/load.php\?debug=true&lang=en&modules=jquery%2Cmediawiki&only=scripts&skin=vector&version=09k3x4y";/
export const LOAD_PHP = /script.src = ".*load\.php.*";/
export const WEBP_HANDLER_URL = 'https://gist.githubusercontent.com/rgaudin/60bb9cc6f187add506584258028b8ee1/raw/9d575b8e25d67eed2a9c9a91d3e053a0062d2fc7/web-handler.js'
export const MAX_FILE_DOWNLOAD_RETRIES = 5
2 changes: 1 addition & 1 deletion src/util/dump.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ export async function downloadAndSaveModule(zimCreator: ZimCreator, mw: MediaWik
// it also removes requestIdleCallback as in our case window is idle after all script tags are called but those script tags
// will require the functions which would have been loaded by doPropagation.
function hackStartUpModule(jsCode: string) {
if ((!ALL_READY_FUNCTION.test(jsCode) || !DO_PROPAGATION.test(jsCode)) && !LOAD_PHP.test(jsCode)) {
if ((!ALL_READY_FUNCTION.test(jsCode) || !DO_PROPAGATION.test(jsCode)) && !LOAD_PHP.test(jsCode)) {
throw new Error('unable to hack startup module')
}

Expand Down
43 changes: 43 additions & 0 deletions test/e2e/vikidia.e2e.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import * as mwoffliner from '../../src/mwoffliner.lib.js'
import { execa } from 'execa'
import rimraf from 'rimraf'
import { zimcheckAvailable, zimcheck } from '../util.js'
import 'dotenv/config.js'
import { jest } from '@jest/globals'

jest.setTimeout(200000)

describe('vikidia', () => {
const now = new Date()
const testId = `mwo-test-${+now}`

const parameters = {
mwUrl: 'https://en.vikidia.org',
adminEmail: 'test@kiwix.org',
outputDirectory: testId,
redis: process.env.REDIS,
articleList: 'Alaska',
}

test('right scrapping from vikidia.org', async () => {
await execa('redis-cli flushall', { shell: true })

const outFiles = await mwoffliner.execute(parameters)

// Created 1 output
expect(outFiles).toHaveLength(1)

if (await zimcheckAvailable()) {
await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError()
} else {
console.log('Zimcheck not installed, skipping test')
}

// TODO: clear test dir
rimraf.sync(`./${testId}`)

const redisScan = await execa('redis-cli --scan', { shell: true })
// Redis has been cleared
expect(redisScan.stdout).toEqual('')
})
})

0 comments on commit 0431b73

Please sign in to comment.