Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move to libzim9 #1838

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# top-most EditorConfig file
root = true

# Unix-style newlines with a newline ending every file
[*]
charset = utf-8
trim_trailing_whitespace = true
end_of_line = lf
insert_final_newline = true

# Tab indentation (no size specified)
[Makefile]
indent_style = tab

[*.{c,h,cpp,cpp,hpp}]
indent_size = 4

[*.{js,ts}]
indent_size = 2

25,964 changes: 9,552 additions & 16,412 deletions package-lock.json

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
},
"dependencies": {
"@ladjs/country-language": "^1.0.3",
"@openzim/libzim": "2.4.4",
"@openzim/libzim": "3.0.0",
"@types/async": "^3.2.18",
"@types/backoff": "^2.5.2",
"@types/bluebird": "^3.5.38",
Expand Down Expand Up @@ -127,7 +127,7 @@
"eslint-plugin-jsdoc": "^40.1.0",
"eslint-plugin-prefer-arrow": "^1.2.3",
"eslint-plugin-prettier": "^4.2.1",
"file-type": "^18.2.1",
"file-type": "^18.4.0",
"jest": "^29.3.1",
"nyc": "^15.1.0",
"prettier": "2.8.7",
Expand Down
71 changes: 25 additions & 46 deletions src/mwoffliner.lib.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import urlParser from 'url'
import semver from 'semver'
import * as path from 'path'
import * as QueryStringParser from 'querystring'
import { ZimArticle, ZimCreator } from '@openzim/libzim'
import { Creator as ZimCreator, StringItem, Compression } from '@openzim/libzim'

import {
MAX_CPU_CORES,
Expand Down Expand Up @@ -369,24 +369,22 @@ async function execute(argv: any) {
Name: dump.computeFilenameRadical(false, true, true),
Flavour: dump.computeFlavour(),
...(dump.opts.customZimLongDescription ? { LongDescription: `${dump.opts.customZimLongDescription}` } : {}),
Scraper: `mwoffliner ${packageJSON.version}`,
}
validateMetadata(metadata)

const zimCreator = new ZimCreator(
{
fileName: outZim,
fullTextIndexLanguage: dump.opts.withoutZimFullTextIndex ? '' : dump.mwMetaData.langIso3,
welcome: dump.opts.mainPage ? dump.opts.mainPage : 'index',
compression: 'zstd',
},
metadata as any,
)
const scraperArticle = new ZimArticle({
ns: 'M',
data: `mwoffliner ${packageJSON.version}`,
url: 'Scraper',
})
zimCreator.addArticle(scraperArticle)
const zimCreator = new ZimCreator()
.configIndexing(true, dump.opts.withoutZimFullTextIndex ? '' : dump.mwMetaData.langIso3)
.configCompression(Compression.Zstd)
.startZimCreation(outZim)
zimCreator.setMainPath(dump.opts.mainPage ?? 'index')

for (const [name, content] of Object.entries(metadata)) {
if (content === undefined || content === null) {
console.error(`Skipped adding metadata ${name}:[${content}]`)
}
zimCreator.addMetadata(name, content.toString())
}

logger.info('Copying Static Resource Files')
await saveStaticFiles(config, zimCreator)
Expand All @@ -399,8 +397,8 @@ async function execute(argv: any) {
const { finalCss } = await getAndProcessStylesheets(downloader, redisStore, stylesheetsToGet)
logger.log('Downloaded stylesheets')

const article = new ZimArticle({ url: `${config.output.dirs.mediawiki}/style.css`, data: finalCss, ns: '-' })
zimCreator.addArticle(article)
const item = new StringItem(`${config.output.dirs.mediawiki}/style.css`, 'text/css', '', {}, finalCss)
await zimCreator.addItem(item)
await saveFavicon(zimCreator, metaDataRequiredKeys['Illustration_48x48@1'])

await getThumbnailsData()
Expand Down Expand Up @@ -445,7 +443,7 @@ async function execute(argv: any) {
await writeArticleRedirects(downloader, dump, zimCreator)

logger.log('Finishing Zim Creation')
await zimCreator.finalise()
await zimCreator.finishZimCreation()

logger.log('Summary of scrape actions:', JSON.stringify(dump.status, null, '\t'))
}
Expand All @@ -458,19 +456,9 @@ async function execute(argv: any) {
await redirectsXId.iterateItems(downloader.speed, async (redirects) => {
for (const [redirectId, { targetId }] of Object.entries(redirects)) {
if (redirectId !== targetId) {
const redirectArticle = new ZimArticle({
url: redirectId,
shouldIndex: true,
data: '',
ns: 'A',
mimeType: 'text/html',

// We fake a title, by just removing the underscores
title: String(redirectId).replace(/_/g, ' '),

redirectUrl: targetId,
})
zimCreator.addArticle(redirectArticle)
// We fake a title, by just removing the underscores
const title = String(redirectId).replace(/_/g, ' ')
zimCreator.addRedirection(redirectId, title, targetId, { FRONT_ARTICLE: 1 })
dump.status.redirects.written += 1
}
}
Expand Down Expand Up @@ -519,8 +507,8 @@ async function execute(argv: any) {
async function saveFavicon(zimCreator: ZimCreator, data: Buffer): Promise<any> {
logger.log('Saving favicon.png...')
try {
const article = new ZimArticle({ url: 'favicon', mimeType: 'image/png', data, ns: '-' })
return zimCreator.addArticle(article)
const item = new StringItem('favicon', 'image/png', '', {}, data.toString())
return await zimCreator.addItem(item)
} catch (e) {
throw new Error('Failed to save favicon')
}
Expand Down Expand Up @@ -572,22 +560,13 @@ async function execute(argv: any) {
}

/* Write the static html file */
const article = new ZimArticle({ url: 'index', data: doc.documentElement.outerHTML, ns: 'A', mimeType: 'text/html', title: 'Main Page' })
return zimCreator.addArticle(article)
const item = new StringItem('index', 'text/html', 'Main Page', { FRONT_ARTICLE: 1 }, doc.documentElement.outerHTML)
return await zimCreator.addItem(item)
}

function createMainPageRedirect() {
logger.log(`Create main page redirection from [index] to [${'A/' + mainPage}]`)
const article = new ZimArticle({
url: 'index',
shouldIndex: true,
data: '',
ns: 'A',
mimeType: 'text/html',
title: mainPage,
redirectUrl: mainPage,
})
return zimCreator.addArticle(article)
return zimCreator.addRedirection('index', mainPage, mainPage, { FRONT_ARTICLE: 1 })
}

return mainPage ? createMainPageRedirect() : createMainPage()
Expand Down
32 changes: 14 additions & 18 deletions src/util/dump.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import Downloader from '../Downloader.js'
import { getFullUrl, jsPath, cssPath } from './index.js'
import { config } from '../config.js'
import MediaWiki from '../MediaWiki.js'
import { ZimCreator, ZimArticle } from '@openzim/libzim'
import { Creator as ZimCreator, StringItem } from '@openzim/libzim'
import { Dump } from '../Dump.js'
import fs from 'fs'
import { DO_PROPAGATION, ALL_READY_FUNCTION, WEBP_HANDLER_URL, LOAD_PHP, RULE_TO_REDIRECT } from './const.js'
Expand Down Expand Up @@ -140,8 +140,9 @@ export async function downloadAndSaveModule(zimCreator: ZimCreator, mw: MediaWik

try {
const articleId = type === 'js' ? jsPath(module, config.output.dirs.mediawiki) : cssPath(module, config.output.dirs.mediawiki)
const article = new ZimArticle({ url: articleId, data: text, ns: '-' })
zimCreator.addArticle(article)
const mimeType = type === 'js' ? 'application/javascript' : 'text/css'
const item = new StringItem(articleId, mimeType, '', {}, text)
await zimCreator.addItem(item)
logger.info(`Saved module [${module}]`)
} catch (e) {
logger.error(`Failed to get module with url [${moduleApiUrl}]\nYou may need to specify a custom --mwModulePath`, e)
Expand All @@ -151,17 +152,16 @@ export async function downloadAndSaveModule(zimCreator: ZimCreator, mw: MediaWik

// URLs should be kept the same as Kiwix JS relies on it.
export async function importPolyfillModules(zimCreator: ZimCreator) {
;[
const polyfills = [
{ name: 'webpHeroPolyfill', path: path.join(__dirname, '../../node_modules/webp-hero/dist-cjs/polyfills.js') },
{ name: 'webpHeroBundle', path: path.join(__dirname, '../../node_modules/webp-hero/dist-cjs/webp-hero.bundle.js') },
].forEach(({ name, path }) => {
const article = new ZimArticle({
url: jsPath(name),
data: fs.readFileSync(path, 'utf8').toString(),
ns: '-',
})
zimCreator.addArticle(article)
})
]

for (const { name, path } of polyfills) {
const data = fs.readFileSync(path, 'utf8').toString()
const item = new StringItem(jsPath(name), 'application/javascript', '', {}, data)
await zimCreator.addItem(item)
}

const content = await axios
.get(WEBP_HANDLER_URL, {
Expand All @@ -176,10 +176,6 @@ export async function importPolyfillModules(zimCreator: ZimCreator) {
throw new Error(`Failed to download webpHandler from [${WEBP_HANDLER_URL}]: ${err}`)
})

const article = new ZimArticle({
url: jsPath('webpHandler'),
data: content,
ns: '-',
})
zimCreator.addArticle(article)
const item = new StringItem(jsPath('webpHandler'), '', '', {}, content)
await zimCreator.addItem(item)
}
14 changes: 7 additions & 7 deletions src/util/misc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import mime from 'mime-types'
import mkdirp from 'mkdirp'
import os from 'os'
import pathParser from 'path'
import { ZimCreator, ZimArticle } from '@openzim/libzim'
import { Creator as ZimCreator, FileItem } from '@openzim/libzim'
import { Config, config } from '../config.js'
import * as logger from '../Logger.js'
import {
Expand Down Expand Up @@ -165,19 +165,19 @@ export function interpolateTranslationString(str: string, parameters: { [key: st
export function saveStaticFiles(config: Config, zimCreator: ZimCreator) {
const cssPromises = config.output.cssResources.concat(config.output.mainPageCssResources).map(async (css) => {
try {
const cssCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/${css}.css`))
const article = new ZimArticle({ url: cssPath(css), data: cssCont, ns: '-' })
zimCreator.addArticle(article)
const cssFilePath = pathParser.resolve(__dirname, `../../res/${css}.css`)
const item = new FileItem(cssPath(css), 'text/css', '', {}, cssFilePath)
await zimCreator.addItem(item)
} catch (error) {
logger.warn(`Could not create ${css} file : ${error}`)
}
})

const jsPromises = config.output.jsResources.map(async (js) => {
try {
const jsCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/${js}.js`))
const article = new ZimArticle({ url: jsPath(js), data: jsCont, ns: '-' })
zimCreator.addArticle(article)
const jsFilePath = pathParser.resolve(__dirname, `../../res/${js}.js`)
const item = new FileItem(jsPath(js), 'application/javascript', '', {}, jsFilePath)
await zimCreator.addItem(item)
} catch (error) {
logger.warn(`Could not create ${js} file : ${error}`)
}
Expand Down
37 changes: 17 additions & 20 deletions src/util/saveArticles.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import * as logger from '../Logger.js'
import Downloader from '../Downloader.js'
import MediaWiki from '../MediaWiki.js'
import { ZimArticle, ZimCreator } from '@openzim/libzim'
import { Creator as ZimCreator, StringItem } from '@openzim/libzim'
import htmlMinifier from 'html-minifier'
import * as QueryStringParser from 'querystring'

Expand Down Expand Up @@ -51,13 +51,12 @@ export async function downloadFiles(fileStore: RKVS<FileDetail>, retryStore: RKV
let isFailed = false
try {
if (resp.result && resp.result.content) {
const article = new ZimArticle({
url: resp.path,
data: resp.result.content,
ns: resp.namespace || 'I',
mimeType: resp.result.responseHeaders['content-type'],
})
zimCreator.addArticle(article)
const url = resp.path
const mimeType = resp.result.responseHeaders['content-type']
const data = resp.result.content
const item = new StringItem(url, mimeType, '', {}, data)
await zimCreator.addItem(item)

dump.status.files.success += 1
} else {
isFailed = true
Expand Down Expand Up @@ -218,16 +217,14 @@ async function saveArticle(

await redisStore.filesToDownloadXPath.setMany(filesToDownload)

const zimArticle = new ZimArticle({
url: articleId,
data: finalHTML,
ns: articleDetail.ns === 14 ? 'U' : 'A',
mimeType: 'text/html',
title: articleTitle,
shouldIndex: true,
})

zimCreator.addArticle(zimArticle)
const item = new StringItem(
articleId, // path / url
'text/html', // mimetype
articleTitle, // title
{ FRONT_ARTICLE: 1 }, // Hints
finalHTML, // Content
)
await zimCreator.addItem(item)

return null
} catch (err) {
Expand Down Expand Up @@ -368,8 +365,8 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade

logger.log(`Done with downloading a total of [${articlesTotal}] articles`)

const jsConfigVarArticle = new ZimArticle({ url: jsPath('jsConfigVars', config.output.dirs.mediawiki), data: jsConfigVars, ns: '-' })
zimCreator.addArticle(jsConfigVarArticle)
const jsConfigVarItem = new StringItem(jsPath('jsConfigVars', config.output.dirs.mediawiki), 'application/javascript', '', {}, jsConfigVars)
await zimCreator.addItem(jsConfigVarItem)

return {
jsModuleDependencies,
Expand Down
Loading