Skip to content
This repository has been archived by the owner on Feb 22, 2023. It is now read-only.

Download translations in bulk to prevent GlotPress throttling #2188

Merged
merged 8 commits into from
Feb 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/ghcr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ jobs:
with:
ref: ${{ github.event.release.tag_name }}

- uses: ./.github/actions/setup-node-env

- name: Download translation strings
run: pnpm i18n
env:
GLOTPRESS_USERNAME: ${{ secrets.MAKE_USERNAME }}
GLOTPRESS_PASSWORD: ${{ secrets.MAKE_LOGIN_PASSWORD }}

- uses: docker/setup-buildx-action@v2
with:
install: true
Expand Down
11 changes: 7 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,9 @@ selenium-debug.log
vercel.json
.eslintcache
.nuxt
src/locales/*.json
.nuxt-storybook
storybook-static
.vercel
/src/locales/scripts/valid-locales.json
/src/locales/scripts/untranslated-locales.json
/src/locales/scripts/wp-locales.json
.zshrc
.tcv-export

Expand All @@ -44,3 +40,10 @@ test/Default

# Ignore generated translation files
*.pot

# Ignore downloaded translation files
/src/locales/openverse.zip
/src/locales/*.json
/src/locales/scripts/valid-locales.json
/src/locales/scripts/untranslated-locales.json
/src/locales/scripts/wp-locales.json
1 change: 0 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ ARG RELEASE

RUN echo "{\"release\":\"${RELEASE}\"}" > /home/node/app/src/static/version.json

RUN pnpm i18n
RUN pnpm build:only

###################
Expand Down
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@
"@typescript-eslint/parser": "^5.44.0",
"@vue/runtime-dom": "^3.2.37",
"@vue/test-utils": "^1.1.3",
"adm-zip": "^0.5.10",
"autoprefixer": "^10.4.0",
"axios-rate-limit": "^1.3.0",
"babel-jest": "^26.6.3",
Expand All @@ -170,6 +171,7 @@
"postcss": "^8.4.12",
"prettier": "^2.8.3",
"prettier-plugin-tailwindcss": "^0.2.2",
"qs": "^6.11.0",
"rimraf": "^3.0.2",
"tailwind-config-viewer": "^1.6.3",
"tailwindcss": "^3.2.4",
Expand Down
35 changes: 22 additions & 13 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

116 changes: 116 additions & 0 deletions src/locales/scripts/bulk-download.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
const { pipeline } = require("stream/promises")

const { createWriteStream } = require("fs")

const qs = require("qs")
const AdmZip = require("adm-zip")

const { writeLocaleFile } = require("./utils")
const axios = require("./axios")
const jed1xJsonToJson = require("./jed1x-json-to-json")

const LOGIN_URL = "https://login.wordpress.org/wp-login.php"
const BULK_DOWNLOAD_URL =
"https://translate.wordpress.org/exporter/meta/openverse/-do/"

/**
* Given a username and password, login to WordPress and get the authentication
* cookies from the `Set-Cookie` header.
*
* @param log {string} - the username to log in with
* @param pwd {string} - the password for the given username
* @return {Promise<string[]>} - the list of cookies in the `Set-Cookie` header
*/
const getAuthCookies = async (log, pwd) => {
const res = await axios.post(
LOGIN_URL,
qs.stringify({
log,
pwd,
rememberme: "forever",
"wp-submit": "Log In",
redirect_to: "https://make.wordpress.org/",
}),
{
headers: { "content-type": "application/x-www-form-urlencoded" },
maxRedirects: 0,
validateStatus: () => true,
}
)
if (
res.status == 302 &&
res.headers["set-cookie"].join(" ").includes("wporg_logged_in")
) {
return res.headers["set-cookie"].map((cookie) =>
cookie.substring(0, cookie.indexOf(";"))
)
}
throw new Error(`Authentication failed: server returned ${res.status}`)
}

/**
* Fetch the ZIP of translations strings from GlotPress using the authentication
* cookies to access the page.
*
* @param cookies {string[]} - the cookies to authenticate the ZIP download
* @return {Promise<string>}} - the path to the downloaded ZIP file
*/
const fetchBulkJed1x = async (cookies) => {
const res = await axios.get(BULK_DOWNLOAD_URL, {
headers: { cookie: cookies.join(";") },
params: { "export-format": "jed1x" },
responseType: "stream",
})
const destPath = process.cwd() + "/src/locales/openverse.zip"
await pipeline(res.data, createWriteStream(destPath))
return destPath
}

/**
* Extract all JSON file from the given ZIP file. Their names are sanitised to
* be in the format `<locale_code>.json`.
*
* @param zipPath {string} - the path to the ZIP file to extract
* @return {Promise<unknown[]>} - the outcome of writing all ZIP files
*/
const extractZip = async (zipPath) => {
const zip = new AdmZip(zipPath, undefined)
const localeJsonMap = zip
.getEntries()
.filter((entry) => entry.entryName.endsWith(".json"))
.map((entry) => {
const jed1xObj = JSON.parse(zip.readAsText(entry))
const vueI18nObj = jed1xJsonToJson(jed1xObj)
const localeName = entry.name
.replace("meta-openverse-", "")
.replace(".jed.json", "")
return [localeName, vueI18nObj]
})
return await Promise.all(
localeJsonMap.map((args) => writeLocaleFile(...args))
)
}

/**
* Perform a bulk download of translation strings from GlotPress and extrat the
* JSON files from the ZIP archive.
*
* @return {Promise<boolean>} - whether the bulk download succeeded
*/
const bulkDownload = async () => {
console.log("Performing bulk download.")
const username = process.env.GLOTPRESS_USERNAME
const password = process.env.GLOTPRESS_PASSWORD

if (!(username && password)) {
console.log("Auth credentials not found, bulk download cancelled.")
throw new Error("Bulk download cancelled")
}

const cookies = await getAuthCookies(username, password)
const zipPath = await fetchBulkJed1x(cookies)
const translations = await extractZip(zipPath)
console.log(`Successfully saved ${translations.length} translations.`)
}

module.exports = bulkDownload
Loading