Skip to content

Commit

Permalink
Merge pull request #1783 from openzim/vikidia-fixes
Browse files Browse the repository at this point in the history
fixes for vikidia.org to avoid error "unable to hack startup module"
  • Loading branch information
kelson42 authored Feb 18, 2023
2 parents 89abe74 + 2c75a30 commit 71ab578
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 4 deletions.
2 changes: 1 addition & 1 deletion src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ const config = {
],
js: [
'startup',
'jquery',
'jquery,mediawiki',
'mediawiki.base',
'mediawiki.util',
'site',
Expand Down
1 change: 1 addition & 0 deletions src/util/const.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,6 @@ export const WEBP_CANDIDATE_IMAGE_MIME_TYPE = /image+[/]+(jpeg|png)/
export const DEFAULT_WIKI_PATH = 'wiki/'
export const ALL_READY_FUNCTION = /function allReady\( modules \) {/
export const DO_PROPAGATION = /mw\.requestIdleCallback\( doPropagation, \{ timeout: 1 \} \);/
export const LOAD_PHP = /script.src = ".*load\.php.*";/
export const WEBP_HANDLER_URL = 'https://gist.githubusercontent.com/rgaudin/60bb9cc6f187add506584258028b8ee1/raw/9d575b8e25d67eed2a9c9a91d3e053a0062d2fc7/web-handler.js'
export const MAX_FILE_DOWNLOAD_RETRIES = 5
6 changes: 3 additions & 3 deletions src/util/dump.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import MediaWiki from '../MediaWiki.js'
import { ZimCreator, ZimArticle } from '@openzim/libzim'
import { Dump } from '../Dump.js'
import fs from 'fs'
import { DO_PROPAGATION, ALL_READY_FUNCTION, WEBP_HANDLER_URL } from './const.js'
import { DO_PROPAGATION, ALL_READY_FUNCTION, WEBP_HANDLER_URL, LOAD_PHP } from './const.js'
import * as path from 'path'
import { fileURLToPath } from 'url'

Expand Down Expand Up @@ -101,11 +101,11 @@ export async function downloadAndSaveModule(zimCreator: ZimCreator, mw: MediaWik
// it also removes requestIdleCallback as in our case window is idle after all script tags are called but those script tags
// will require the functions which would have been loaded by doPropagation.
function hackStartUpModule(jsCode: string) {
if (!ALL_READY_FUNCTION.test(jsCode) || !DO_PROPAGATION.test(jsCode)) {
if ((!ALL_READY_FUNCTION.test(jsCode) || !DO_PROPAGATION.test(jsCode)) && !LOAD_PHP.test(jsCode)) {
throw new Error('unable to hack startup module')
}

return jsCode.replace(DO_PROPAGATION, 'doPropagation();').replace(ALL_READY_FUNCTION, 'function allReady( modules ) { return true;')
return jsCode.replace(DO_PROPAGATION, 'doPropagation();').replace(ALL_READY_FUNCTION, 'function allReady( modules ) { return true;').replace(LOAD_PHP, 'script.src ="";')
}

let apiParameterOnly
Expand Down
43 changes: 43 additions & 0 deletions test/e2e/vikidia.e2e.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import * as mwoffliner from '../../src/mwoffliner.lib.js'
import { execa } from 'execa'
import rimraf from 'rimraf'
import { zimcheckAvailable, zimcheck } from '../util.js'
import 'dotenv/config.js'
import { jest } from '@jest/globals'

jest.setTimeout(200000)

describe('vikidia', () => {
const now = new Date()
const testId = `mwo-test-${+now}`

const parameters = {
mwUrl: 'https://en.vikidia.org',
adminEmail: 'test@kiwix.org',
outputDirectory: testId,
redis: process.env.REDIS,
articleList: 'Alaska',
}

test('right scrapping from vikidia.org', async () => {
await execa('redis-cli flushall', { shell: true })

const outFiles = await mwoffliner.execute(parameters)

// Created 1 output
expect(outFiles).toHaveLength(1)

if (await zimcheckAvailable()) {
await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError()
} else {
console.log('Zimcheck not installed, skipping test')
}

// TODO: clear test dir
rimraf.sync(`./${testId}`)

const redisScan = await execa('redis-cli --scan', { shell: true })
// Redis has been cleared
expect(redisScan.stdout).toEqual('')
})
})

0 comments on commit 71ab578

Please sign in to comment.