Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move to libzim9 #1838

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
# EditorConfig is awesome: https://EditorConfig.org

# top-most EditorConfig file
root = true

# Unix-style newlines with a newline ending every file
[*]
charset = utf-8
indent_style = space
indent_size = 2
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
insert_final_newline = true


# Tab indentation (no size specified)
[Makefile]
indent_style = tab

[*.{c,h,cpp,cpp,hpp}]
indent_size = 4

15,689 changes: 5,799 additions & 9,890 deletions package-lock.json

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
"dependencies": {
"@aws-sdk/client-s3": "^3.374.0",
"@ladjs/country-language": "^1.0.3",
"@openzim/libzim": "2.4.4",
"@openzim/libzim": "3.0.0",
"@types/async": "^3.2.18",
"@types/backoff": "^2.5.2",
"@types/bluebird": "^3.5.38",
Expand Down Expand Up @@ -127,7 +127,7 @@
"eslint-plugin-jsdoc": "^40.1.0",
"eslint-plugin-prefer-arrow": "^1.2.3",
"eslint-plugin-prettier": "^4.2.1",
"file-type": "^18.2.1",
"file-type": "^18.4.0",
"jest": "^29.3.1",
"nyc": "^15.1.0",
"prettier": "2.8.7",
Expand Down
71 changes: 25 additions & 46 deletions src/mwoffliner.lib.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import semver from 'semver'
import * as path from 'path'
import * as QueryStringParser from 'querystring'
import { ZimArticle, ZimCreator } from '@openzim/libzim'

Check failure on line 19 in src/mwoffliner.lib.ts

View workflow job for this annotation

GitHub Actions / build (18.x)

Module '"@openzim/libzim"' has no exported member 'ZimArticle'.

Check failure on line 19 in src/mwoffliner.lib.ts

View workflow job for this annotation

GitHub Actions / build (18.x)

'"@openzim/libzim"' has no exported member named 'ZimCreator'. Did you mean 'Creator'?

Check warning on line 19 in src/mwoffliner.lib.ts

View workflow job for this annotation

GitHub Actions / build (18.x)

'ZimArticle' is defined but never used
import { checkApiAvailability } from './util/mw-api.js'

import {
Expand Down Expand Up @@ -379,24 +379,22 @@
Name: dump.computeFilenameRadical(false, true, true),
Flavour: dump.computeFlavour(),
...(dump.opts.customZimLongDescription ? { LongDescription: `${dump.opts.customZimLongDescription}` } : {}),
Scraper: `mwoffliner ${packageJSON.version}`,
}
validateMetadata(metadata)

const zimCreator = new ZimCreator(
{
fileName: outZim,
fullTextIndexLanguage: dump.opts.withoutZimFullTextIndex ? '' : dump.mwMetaData.langIso3,
welcome: dump.opts.mainPage ? dump.opts.mainPage : 'index',
compression: 'zstd',
},
metadata as any,
)
const scraperArticle = new ZimArticle({
ns: 'M',
data: `mwoffliner ${packageJSON.version}`,
url: 'Scraper',
})
zimCreator.addArticle(scraperArticle)
const zimCreator = new ZimCreator()
.configIndexing(true, dump.opts.withoutZimFullTextIndex ? '' : dump.mwMetaData.langIso3)
.configCompression(Compression.Zstd)

Check failure on line 388 in src/mwoffliner.lib.ts

View workflow job for this annotation

GitHub Actions / build (18.x)

Cannot find name 'Compression'.
.startZimCreation(outZim)
zimCreator.setMainPath(dump.opts.mainPage ?? 'index')

for (const [name, content] of Object.entries(metadata)) {
if (content === undefined || content === null) {
console.error(`Skipped adding metadata ${name}:[${content}]`)
}
zimCreator.addMetadata(name, content.toString())
}

logger.info('Copying Static Resource Files')
await saveStaticFiles(config, zimCreator)
Expand All @@ -409,8 +407,8 @@
const { finalCss } = await getAndProcessStylesheets(downloader, stylesheetsToGet)
logger.log('Downloaded stylesheets')

const article = new ZimArticle({ url: `${config.output.dirs.mediawiki}/style.css`, data: finalCss, ns: '-' })
zimCreator.addArticle(article)
const item = new StringItem(`${config.output.dirs.mediawiki}/style.css`, 'text/css', '', {}, finalCss)

Check failure on line 410 in src/mwoffliner.lib.ts

View workflow job for this annotation

GitHub Actions / build (18.x)

Cannot find name 'StringItem'.
await zimCreator.addItem(item)
await saveFavicon(zimCreator, metaDataRequiredKeys['Illustration_48x48@1'])

await getThumbnailsData()
Expand Down Expand Up @@ -455,7 +453,7 @@
await writeArticleRedirects(downloader, dump, zimCreator)

logger.log('Finishing Zim Creation')
await zimCreator.finalise()
await zimCreator.finishZimCreation()

logger.log('Summary of scrape actions:', JSON.stringify(dump.status, null, '\t'))
}
Expand All @@ -468,19 +466,9 @@
await redirectsXId.iterateItems(downloader.speed, async (redirects) => {
for (const [redirectId, { targetId }] of Object.entries(redirects)) {
if (redirectId !== targetId) {
const redirectArticle = new ZimArticle({
url: redirectId,
shouldIndex: true,
data: '',
ns: 'A',
mimeType: 'text/html',

// We fake a title, by just removing the underscores
title: String(redirectId).replace(/_/g, ' '),

redirectUrl: targetId,
})
zimCreator.addArticle(redirectArticle)
// We fake a title, by just removing the underscores
const title = String(redirectId).replace(/_/g, ' ')
zimCreator.addRedirection(redirectId, title, targetId, { FRONT_ARTICLE: 1 })
dump.status.redirects.written += 1
}
}
Expand Down Expand Up @@ -533,8 +521,8 @@
async function saveFavicon(zimCreator: ZimCreator, data: Buffer): Promise<any> {
logger.log('Saving favicon.png...')
try {
const article = new ZimArticle({ url: 'favicon', mimeType: 'image/png', data, ns: '-' })
return zimCreator.addArticle(article)
const item = new StringItem('favicon', 'image/png', '', {}, data)

Check failure on line 524 in src/mwoffliner.lib.ts

View workflow job for this annotation

GitHub Actions / build (18.x)

Cannot find name 'StringItem'.
return await zimCreator.addItem(item)
} catch (e) {
throw new Error('Failed to save favicon')
}
Expand Down Expand Up @@ -586,22 +574,13 @@
}

/* Write the static html file */
const article = new ZimArticle({ url: 'index', data: doc.documentElement.outerHTML, ns: 'A', mimeType: 'text/html', title: 'Main Page' })
return zimCreator.addArticle(article)
const item = new StringItem('index', 'text/html', 'Main Page', { FRONT_ARTICLE: 1 }, doc.documentElement.outerHTML)

Check failure on line 577 in src/mwoffliner.lib.ts

View workflow job for this annotation

GitHub Actions / build (18.x)

Cannot find name 'StringItem'.
return await zimCreator.addItem(item)
}

function createMainPageRedirect() {
logger.log(`Create main page redirection from [index] to [${'A/' + mainPage}]`)
const article = new ZimArticle({
url: 'index',
shouldIndex: true,
data: '',
ns: 'A',
mimeType: 'text/html',
title: mainPage,
redirectUrl: mainPage,
})
return zimCreator.addArticle(article)
logger.log(`Create main page redirection from [index] to [${mainPage}]`)
return zimCreator.addRedirection('index', mainPage, mainPage, { FRONT_ARTICLE: 1 })
}

return mainPage ? createMainPageRedirect() : createMainPage()
Expand Down
32 changes: 14 additions & 18 deletions src/util/dump.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import RedisStore from '../RedisStore.js'
import { getFullUrl, jsPath, cssPath } from './index.js'
import { config } from '../config.js'
import MediaWiki from '../MediaWiki.js'
import { ZimCreator, ZimArticle } from '@openzim/libzim'
import { Creator as ZimCreator, StringItem } from '@openzim/libzim'
import { Dump } from '../Dump.js'
import fs from 'fs'
import { DO_PROPAGATION, ALL_READY_FUNCTION, WEBP_HANDLER_URL, LOAD_PHP, RULE_TO_REDIRECT } from './const.js'
Expand Down Expand Up @@ -142,8 +142,9 @@ export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader:

try {
const articleId = type === 'js' ? jsPath(module, config.output.dirs.mediawiki) : cssPath(module, config.output.dirs.mediawiki)
const article = new ZimArticle({ url: articleId, data: text, ns: '-' })
zimCreator.addArticle(article)
const mimeType = type === 'js' ? 'application/javascript' : 'text/css'
const item = new StringItem(articleId, mimeType, '', {}, text)
await zimCreator.addItem(item)
logger.info(`Saved module [${module}]`)
} catch (e) {
logger.error(`Failed to get module with url [${moduleApiUrl}]\nYou may need to specify a custom --mwModulePath`, e)
Expand All @@ -153,17 +154,16 @@ export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader:

// URLs should be kept the same as Kiwix JS relies on it.
export async function importPolyfillModules(zimCreator: ZimCreator) {
;[
const polyfills = [
{ name: 'webpHeroPolyfill', path: path.join(__dirname, '../../node_modules/webp-hero/dist-cjs/polyfills.js') },
{ name: 'webpHeroBundle', path: path.join(__dirname, '../../node_modules/webp-hero/dist-cjs/webp-hero.bundle.js') },
].forEach(({ name, path }) => {
const article = new ZimArticle({
url: jsPath(name),
data: fs.readFileSync(path, 'utf8').toString(),
ns: '-',
})
zimCreator.addArticle(article)
})
]

for (const { name, path } of polyfills) {
const data = fs.readFileSync(path, 'utf8').toString()
const item = new StringItem(jsPath(name), 'application/javascript', '', {}, data)
await zimCreator.addItem(item)
}

const content = await axios
.get(WEBP_HANDLER_URL, {
Expand All @@ -178,10 +178,6 @@ export async function importPolyfillModules(zimCreator: ZimCreator) {
throw new Error(`Failed to download webpHandler from [${WEBP_HANDLER_URL}]: ${err}`)
})

const article = new ZimArticle({
url: jsPath('webpHandler'),
data: content,
ns: '-',
})
zimCreator.addArticle(article)
const item = new StringItem(jsPath('webpHandler'), '', '', {}, content)
await zimCreator.addItem(item)
}
16 changes: 8 additions & 8 deletions src/util/misc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import mime from 'mime-types'
import mkdirp from 'mkdirp'
import os from 'os'
import pathParser from 'path'
import { ZimCreator, ZimArticle } from '@openzim/libzim'
import { Creator as ZimCreator, FileItem } from '@openzim/libzim'
import { Config, config } from '../config.js'
import * as logger from '../Logger.js'
import {
Expand Down Expand Up @@ -165,19 +165,19 @@ export function interpolateTranslationString(str: string, parameters: { [key: st
export function saveStaticFiles(config: Config, zimCreator: ZimCreator) {
const cssPromises = config.output.cssResources.concat(config.output.mainPageCssResources).map(async (css) => {
try {
const cssCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/${css}.css`))
const article = new ZimArticle({ url: cssPath(css), data: cssCont, ns: '-' })
zimCreator.addArticle(article)
const cssFilePath = pathParser.resolve(__dirname, `../../res/${css}.css`)
const item = new FileItem(cssPath(css), 'text/css', '', {}, cssFilePath)
await zimCreator.addItem(item)
} catch (error) {
logger.warn(`Could not create ${css} file : ${error}`)
}
})

const jsPromises = config.output.jsResources.map(async (js) => {
try {
const jsCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/${js}.js`))
const article = new ZimArticle({ url: jsPath(js), data: jsCont, ns: '-' })
zimCreator.addArticle(article)
const jsFilePath = pathParser.resolve(__dirname, `../../res/${js}.js`)
const item = new FileItem(jsPath(js), 'application/javascript', '', {}, jsFilePath)
await zimCreator.addItem(item)
} catch (error) {
logger.warn(`Could not create ${js} file : ${error}`)
}
Expand Down Expand Up @@ -324,7 +324,7 @@ export function deDup<T>(_arr: T[], getter: (o: T) => any) {
})
}

export function getRelativeFilePath(parentArticleId: string, fileBase: string, resourceNamespace: 'I' | 'A' | 'M' | '-') {
export function getRelativeFilePath(parentArticleId: string, fileBase: string, resourceNamespace: 'I' | 'M' | '-') {
const slashesInUrl = parentArticleId.split('/').length - 1
const upStr = '../'.repeat(slashesInUrl + 1)
const newUrl = `${upStr}${resourceNamespace}/` + fileBase
Expand Down
39 changes: 19 additions & 20 deletions src/util/saveArticles.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import * as logger from '../Logger.js'
import Downloader from '../Downloader.js'
import { Creator as ZimCreator, StringItem } from '@openzim/libzim'
import htmlMinifier from 'html-minifier'

Check warning on line 4 in src/util/saveArticles.ts

View workflow job for this annotation

GitHub Actions / build (18.x)

'htmlMinifier' is defined but never used
import * as QueryStringParser from 'querystring'

Check warning on line 5 in src/util/saveArticles.ts

View workflow job for this annotation

GitHub Actions / build (18.x)

'QueryStringParser' is defined but never used
import RedisStore from '../RedisStore.js'
import { ZimArticle, ZimCreator } from '@openzim/libzim'

import pmap from 'p-map'
import * as domino from 'domino'
Expand Down Expand Up @@ -38,13 +40,12 @@
let isFailed = false
try {
if (resp.result && resp.result.content) {
const article = new ZimArticle({
url: resp.path,
data: resp.result.content,
ns: resp.namespace || 'I',
mimeType: resp.result.responseHeaders['content-type'],
})
zimCreator.addArticle(article)
const url = resp.path
const mimeType = resp.result.responseHeaders['content-type']
const data = resp.result.content
const item = new StringItem(url, mimeType, '', {}, data)
await zimCreator.addItem(item)

dump.status.files.success += 1
} else {
isFailed = true
Expand Down Expand Up @@ -197,7 +198,7 @@
subtitles: any,
articleId: string,
articleTitle: string,
articleDetail: any,

Check warning on line 201 in src/util/saveArticles.ts

View workflow job for this annotation

GitHub Actions / build (18.x)

'articleDetail' is defined but never used
): Promise<Error> {
try {
const filesToDownload: KVS<FileDetail> = {}
Expand Down Expand Up @@ -225,16 +226,14 @@

await RedisStore.filesToDownloadXPath.setMany(filesToDownload)

const zimArticle = new ZimArticle({
url: articleId,
data: finalHTML,
ns: articleDetail.ns === 14 ? 'U' : 'A',
mimeType: 'text/html',
title: articleTitle,
shouldIndex: true,
})

zimCreator.addArticle(zimArticle)
const item = new StringItem(
articleId, // path / url
'text/html', // mimetype
articleTitle, // title
{ FRONT_ARTICLE: 1 }, // Hints
finalHTML, // Content
)
await zimCreator.addItem(item)

return null
} catch (err) {
Expand Down Expand Up @@ -423,8 +422,8 @@

logger.log(`Done with downloading a total of [${articlesTotal}] articles`)

const jsConfigVarArticle = new ZimArticle({ url: jsPath('jsConfigVars', config.output.dirs.mediawiki), data: jsConfigVars, ns: '-' })
zimCreator.addArticle(jsConfigVarArticle)
const jsConfigVarItem = new StringItem(jsPath('jsConfigVars', config.output.dirs.mediawiki), 'application/javascript', '', {}, jsConfigVars)
await zimCreator.addItem(jsConfigVarItem)

return {
jsModuleDependencies,
Expand Down
Loading
Loading