From c3b626645f38279da68949bbe9f0f7f900185350 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Thu, 3 Oct 2024 08:23:40 +0000 Subject: [PATCH] Add pages to the ZIM and display them in the UI --- .github/workflows/Tests.yml | 2 +- CONTRIBUTING.md | 6 +-- scraper/src/libretexts2zim/client.py | 45 +++++++++++++++-- scraper/src/libretexts2zim/processor.py | 48 ++++++++++++------ scraper/src/libretexts2zim/ui.py | 12 ++++- scraper/tests-integration/test_client.py | 5 ++ scraper/tests-integration/test_zim_content.py | 36 +++++++------ scraper/tests/test_processor.py | 38 +++++++++----- zimui/cypress/e2e/home.cy.ts | 14 +++--- zimui/cypress/fixtures/home.json | 1 - zimui/cypress/fixtures/page_content_123.json | 3 ++ zimui/cypress/fixtures/shared.json | 12 ++++- zimui/public/.gitignore | 1 + .../components/__tests__/HeaderBar.spec.ts | 2 +- zimui/src/router/index.ts | 2 +- zimui/src/stores/home.ts | 43 ---------------- zimui/src/stores/main.ts | 30 ++++++++++- zimui/src/types/home.ts | 3 -- zimui/src/types/shared.ts | 11 ++++ zimui/src/views/HomeView.vue | 50 +++++++++++++++---- 20 files changed, 243 insertions(+), 121 deletions(-) delete mode 100644 zimui/cypress/fixtures/home.json create mode 100644 zimui/cypress/fixtures/page_content_123.json delete mode 100644 zimui/src/stores/home.ts delete mode 100644 zimui/src/types/home.ts diff --git a/.github/workflows/Tests.yml b/.github/workflows/Tests.yml index 6451584..9e7e367 100644 --- a/.github/workflows/Tests.yml +++ b/.github/workflows/Tests.yml @@ -102,7 +102,7 @@ jobs: docker build -t libretexts2zim . - name: Run scraper - run: docker run -v $PWD/output:/output libretexts2zim libretexts2zim --library-slug geo --library-name Geosciences --file-name-format "tests_en_libretexts-geo" + run: docker run -v $PWD/output:/output libretexts2zim libretexts2zim --library-slug geo --library-name Geosciences --root-page-id 28207 --file-name-format "tests_en_libretexts-geo" - name: Run integration test suite run: docker run -v $PWD/scraper/tests-integration:/src/scraper/tests-integration -v $PWD/output:/output -e ZIM_FILE_PATH=/output/tests_en_libretexts-geo.zim libretexts2zim bash -c "pip install pytest; pytest -v /src/scraper/tests-integration" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ecdcf70..4205369 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -8,7 +8,7 @@ See [README](README.md) for details about how to install with hatch virtualenv. ### Developing the ZIM UI in Vue.JS -When you want to alter something in the ZIM UI in Vue.JS, you need assets which are generated by the scraper (e.g. home.json, ...). +When you want to alter something in the ZIM UI in Vue.JS, you need assets which are generated by the scraper (e.g. shared.json, ...). To simplify this, it is possible to: @@ -24,10 +24,10 @@ To achieve this, first build the Docker image based on current code base. docker build -t local-libretexts2zim . ``` -Scrape a library (here we use the [GeoSciences](https://geo.libretexts.org) library, but you could use any other one of interest for your UI developments). +Scrape a library (here we use the [Geosciences](https://geo.libretexts.org) library, but you could use any other one of interest for your UI developments). ``` -docker run --rm -it -v "$PWD/output":/output local-libretexts2zim libretexts2zim --library-slug geo --library-name Geosciences --file-name-format "tests_en_libretexts-geo" +docker run --rm -it -v "$PWD/output":/output local-libretexts2zim libretexts2zim --library-slug geo --library-name Geosciences --file-name-format "tests_en_libretexts-geo" --overwrite ``` Extract interesting ZIM content and move it to `public` folder. diff --git a/scraper/src/libretexts2zim/client.py b/scraper/src/libretexts2zim/client.py index 99bd7b7..a08952d 100644 --- a/scraper/src/libretexts2zim/client.py +++ b/scraper/src/libretexts2zim/client.py @@ -32,12 +32,13 @@ class LibraryPage(BaseModel): id: LibraryPageId title: str + path: str parent: "LibraryPage | None" = None children: list["LibraryPage"] = [] def __repr__(self) -> str: return ( - f"WikiPage(id='{self.id}', title='{self.title}', " + f"WikiPage(id='{self.id}', title='{self.title}', path='{self.path}' " f"parent='{'None' if not self.parent else self.parent.id}', " f"children='{','.join([child.id for child in self.children])}')" ) @@ -52,6 +53,12 @@ def self_and_parents(self) -> list["LibraryPage"]: return result +class LibraryPageContent(BaseModel): + """Content of a given library page""" + + html_body: str + + class LibraryTree(BaseModel): """Class holding information about the tree of pages on a given library""" @@ -246,14 +253,19 @@ def get_page_tree(self) -> LibraryTree: ) root = LibraryPage( - id=tree_data["page"]["@id"], title=tree_data["page"]["title"] + id=tree_data["page"]["@id"], + title=tree_data["page"]["title"], + path=tree_data["page"]["path"]["#text"], ) tree_obj = LibraryTree(root=root) tree_obj.pages[root.id] = root def _add_page(page_node: Any, parent: LibraryPage) -> LibraryPage: page = LibraryPage( - id=page_node["@id"], title=page_node["title"], parent=parent + id=page_node["@id"], + title=page_node["title"], + path=page_node["path"]["#text"], + parent=parent, ) parent.children.append(page) tree_obj.pages[page.id] = page @@ -274,6 +286,33 @@ def _process_tree_data(page_node: Any, parent: LibraryPage) -> None: return tree_obj + def get_page_content(self, page: LibraryPage) -> LibraryPageContent: + """Returns the content of a given page""" + + tree = self._get_api_json( + f"/pages/{page.id}/contents", timeout=HTTP_TIMEOUT_NORMAL_SECONDS + ) + if not isinstance(tree["body"][0], str): + raise LibreTextsParsingError( + f"First body element of /pages/{page.id}/contents is not a string" + ) + if not isinstance(tree["body"][1], dict): + raise LibreTextsParsingError( + f"Second body element of /pages/{page.id}/contents is not a dict" + ) + if "@target" not in tree["body"][1]: + raise LibreTextsParsingError( + f"Unexpected second body element of /pages/{page.id}/contents, " + "no @target property" + ) + if tree["body"][1]["@target"] != "toc": + raise LibreTextsParsingError( + f"Unexpected second body element of /pages/{page.id}/contents, " + f"@target property is '{tree["body"][1]["@target"]}' while only 'toc' " + "is expected" + ) + return LibraryPageContent(html_body=tree["body"][0]) + def _get_soup(content: str) -> BeautifulSoup: """Return a BeautifulSoup soup from textual content diff --git a/scraper/src/libretexts2zim/processor.py b/scraper/src/libretexts2zim/processor.py index 4862afb..93a6bad 100644 --- a/scraper/src/libretexts2zim/processor.py +++ b/scraper/src/libretexts2zim/processor.py @@ -21,7 +21,12 @@ LibreTextsMetadata, ) from libretexts2zim.constants import LANGUAGE_ISO_639_3, NAME, ROOT_DIR, VERSION, logger -from libretexts2zim.ui import ConfigModel, HomeModel, SharedModel +from libretexts2zim.ui import ( + ConfigModel, + PageContentModel, + PageModel, + SharedModel, +) from libretexts2zim.zimconfig import ZimConfig @@ -260,20 +265,6 @@ def run(self) -> Path: stream_file(home.welcome_image_url, byte_stream=welcome_image) add_item_for(creator, "content/logo.png", content=welcome_image.getvalue()) del welcome_image - add_item_for( - creator, - "content/shared.json", - content=SharedModel(logo_path="content/logo.png").model_dump_json( - by_alias=True - ), - ) - add_item_for( - creator, - "content/home.json", - content=HomeModel( - welcome_text_paragraphs=home.welcome_text_paragraphs - ).model_dump_json(by_alias=True), - ) logger.info(f"Adding Vue.JS UI files in {self.zimui_dist}") for file in self.zimui_dist.rglob("*"): @@ -287,7 +278,8 @@ def run(self) -> Path: creator=creator, path=path, content=index_html_path.read_text(encoding="utf-8").replace( - "Vite App", formatted_config.title_format + "Vite App", + f"{formatted_config.title_format}", ), mimetype="text/html", is_front=True, @@ -307,5 +299,29 @@ def run(self) -> Path: f"{len(selected_pages)} pages (out of {len(pages_tree.pages)}) will be " "fetched and pushed to the ZIM" ) + add_item_for( + creator, + "content/shared.json", + content=SharedModel( + logo_path="content/logo.png", + root_page_path=selected_pages[0].path, # root is always first + pages=[ + PageModel(id=page.id, title=page.title, path=page.path) + for page in selected_pages + ], + ).model_dump_json(by_alias=True), + ) + + logger.info("Fetching pages content") + for page in selected_pages: + logger.debug(f" Fetching {page.id}") + page_content = self.libretexts_client.get_page_content(page) + add_item_for( + creator, + f"content/page_content_{page.id}.json", + content=PageContentModel( + html_body=page_content.html_body + ).model_dump_json(by_alias=True), + ) return zim_path diff --git a/scraper/src/libretexts2zim/ui.py b/scraper/src/libretexts2zim/ui.py index cf7864a..085216e 100644 --- a/scraper/src/libretexts2zim/ui.py +++ b/scraper/src/libretexts2zim/ui.py @@ -8,12 +8,20 @@ class CamelModel(BaseModel): model_config = ConfigDict(alias_generator=camelize, populate_by_name=True) -class HomeModel(CamelModel): - welcome_text_paragraphs: list[str] +class PageModel(CamelModel): + id: str + title: str + path: str + + +class PageContentModel(CamelModel): + html_body: str class SharedModel(CamelModel): logo_path: str + root_page_path: str + pages: list[PageModel] class ConfigModel(CamelModel): diff --git a/scraper/tests-integration/test_client.py b/scraper/tests-integration/test_client.py index 5217470..e6a0251 100644 --- a/scraper/tests-integration/test_client.py +++ b/scraper/tests-integration/test_client.py @@ -129,3 +129,8 @@ def test_get_home_welcome_text_paragraphs( """Ensures proper data is retrieved from home of libretexts""" assert home.welcome_text_paragraphs == home_welcome_text_paragraphs + + +def test_get_home_page_content(client: LibreTextsClient, page_tree: LibraryTree): + """Ensures we can get content of root page""" + assert client.get_page_content(page_tree.root).html_body diff --git a/scraper/tests-integration/test_zim_content.py b/scraper/tests-integration/test_zim_content.py index c8018e1..1cfaf12 100644 --- a/scraper/tests-integration/test_zim_content.py +++ b/scraper/tests-integration/test_zim_content.py @@ -56,26 +56,22 @@ def test_zim_content_logo_png(zim_fh: Archive, home_png_size: int): assert len(logo_png.content) == home_png_size # pyright: ignore -def test_zim_content_home_json( - zim_fh: Archive, home_welcome_text_paragraphs: list[str] -): - """Ensure proper content at content/home.json""" - - home_json = zim_fh.get_item("content/home.json") - assert home_json.mimetype == "application/json" # pyright: ignore - assert json.loads(bytes(home_json.content)) == { # pyright: ignore - "welcomeTextParagraphs": home_welcome_text_paragraphs - } - - def test_zim_content_shared_json(zim_fh: Archive): """Ensure proper content at content/shared.json""" shared_json = zim_fh.get_item("content/shared.json") assert shared_json.mimetype == "application/json" # pyright: ignore - assert json.loads(bytes(shared_json.content)) == { # pyright: ignore - "logoPath": "content/logo.png" - } + shared_content = json.loads(bytes(shared_json.content)) # pyright: ignore + shared_content_keys = shared_content.keys() + assert "logoPath" in shared_content_keys + assert "rootPagePath" in shared_content_keys + assert "pages" in shared_content_keys + assert len(shared_content["pages"]) == 4 + for page in shared_content["pages"]: + shared_content_page_keys = page.keys() + assert "id" in shared_content_page_keys + assert "title" in shared_content_page_keys + assert "path" in shared_content_page_keys def test_zim_content_config_json(zim_fh: Archive): @@ -86,3 +82,13 @@ def test_zim_content_config_json(zim_fh: Archive): assert json.loads(bytes(config_json.content)) == { # pyright: ignore "secondaryColor": "#FFFFFF" } + + +@pytest.mark.parametrize("page_id", [28207, 28208, 28209, 28212]) +def test_zim_content_page_content_json(page_id: str, zim_fh: Archive): + """Ensure proper content at content/config.json""" + + config_json = zim_fh.get_item(f"content/page_content_{page_id}.json") + assert config_json.mimetype == "application/json" # pyright: ignore + page_content_keys = json.loads(bytes(config_json.content)).keys() # pyright: ignore + assert "htmlBody" in page_content_keys diff --git a/scraper/tests/test_processor.py b/scraper/tests/test_processor.py index 50e6c4f..2f8f389 100644 --- a/scraper/tests/test_processor.py +++ b/scraper/tests/test_processor.py @@ -6,30 +6,42 @@ @pytest.fixture(scope="module") def library_tree() -> LibraryTree: - root = LibraryPage(id="24", title="Home page") - topic1 = LibraryPage(id="25", title="1: First topic", parent=root) + root = LibraryPage(id="24", title="Home page", path="") + topic1 = LibraryPage( + id="25", title="1: First topic", path="1_First_Topic", parent=root + ) root.children.append(topic1) - topic1_1 = LibraryPage(id="26", title="1.1: Cloud", parent=topic1) + topic1_1 = LibraryPage(id="26", title="1.1: Cloud", path="1.1_Cloud", parent=topic1) topic1.children.append(topic1_1) - topic1_2 = LibraryPage(id="27", title="1.2: Tree", parent=topic1) + topic1_2 = LibraryPage(id="27", title="1.2: Tree", path="1.2_Tree", parent=topic1) topic1.children.append(topic1_2) - topic1_3 = LibraryPage(id="28", title="1.3: Bees", parent=topic1) + topic1_3 = LibraryPage(id="28", title="1.3: Bees", path="1.3_Bees", parent=topic1) topic1.children.append(topic1_3) - topic2 = LibraryPage(id="29", title="2: Second topic", parent=root) + topic2 = LibraryPage( + id="29", title="2: Second topic", path="2_Second_Topic", parent=root + ) root.children.append(topic2) - topic2_1 = LibraryPage(id="30", title="2.1: Underground", parent=topic2) + topic2_1 = LibraryPage( + id="30", title="2.1: Underground", path="2.1_Underground", parent=topic2 + ) topic2.children.append(topic2_1) - topic2_2 = LibraryPage(id="31", title="2.2: Lava", parent=topic2) + topic2_2 = LibraryPage(id="31", title="2.2: Lava", path="2.2_Lava", parent=topic2) topic2.children.append(topic2_2) - topic2_3 = LibraryPage(id="32", title="2.3: Volcano", parent=topic2) + topic2_3 = LibraryPage( + id="32", title="2.3: Volcano", path="2.3_Volcano", parent=topic2 + ) topic2.children.append(topic2_3) - topic3 = LibraryPage(id="33", title="3: Third topic", parent=root) + topic3 = LibraryPage( + id="33", title="3: Third topic", path="3_Third_Topic", parent=root + ) root.children.append(topic3) - topic3_1 = LibraryPage(id="34", title="3.1: Ground", parent=topic3) + topic3_1 = LibraryPage( + id="34", title="3.1: Ground", path="3.1_Ground", parent=topic3 + ) topic3.children.append(topic3_1) - topic3_2 = LibraryPage(id="35", title="3.2: Earth", parent=topic3) + topic3_2 = LibraryPage(id="35", title="3.2: Earth", path="3.2_Earth", parent=topic3) topic3.children.append(topic3_2) - topic3_3 = LibraryPage(id="36", title="3.3: Sky", parent=topic3) + topic3_3 = LibraryPage(id="36", title="3.3: Sky", path="3.3_Sky", parent=topic3) topic3.children.append(topic3_3) return LibraryTree( root=root, diff --git a/zimui/cypress/e2e/home.cy.ts b/zimui/cypress/e2e/home.cy.ts index d3fb839..60390c3 100644 --- a/zimui/cypress/e2e/home.cy.ts +++ b/zimui/cypress/e2e/home.cy.ts @@ -1,12 +1,14 @@ describe('Home of the ZIM UI', () => { beforeEach(() => { - cy.intercept('GET', '/content/home.json', { fixture: 'home.json' }).as('getHome') - cy.intercept('GET', '/content/shared.json', { fixture: 'shared.json' }).as('getShared') cy.intercept('GET', '/content/config.json', { fixture: 'config.json' }).as('getConfig') + cy.intercept('GET', '/content/shared.json', { fixture: 'shared.json' }).as('getShared') + cy.intercept('GET', '/content/page_content_123.json', { fixture: 'page_content_123.json' }).as( + 'getPage' + ) cy.visit('/') - cy.wait('@getHome') - cy.wait('@getShared') cy.wait('@getConfig') + cy.wait('@getShared') + cy.wait('@getPage') }) it('loads the proper header image', () => { @@ -17,8 +19,8 @@ describe('Home of the ZIM UI', () => { }) it('loads the first paragraph only once', () => { - cy.contains('p', 'Paragraph 2').should('be.visible') - cy.get('p:contains("Paragraph 2")').should('have.length', 1) + cy.contains('p', 'Paragraph 1').should('be.visible') + cy.get('p:contains("Paragraph 1")').should('have.length', 1) }) it('loads the second paragraph only once', () => { diff --git a/zimui/cypress/fixtures/home.json b/zimui/cypress/fixtures/home.json deleted file mode 100644 index 98caadb..0000000 --- a/zimui/cypress/fixtures/home.json +++ /dev/null @@ -1 +0,0 @@ -{ "welcomeTextParagraphs": ["Paragraph 1", "Paragraph 2"] } diff --git a/zimui/cypress/fixtures/page_content_123.json b/zimui/cypress/fixtures/page_content_123.json new file mode 100644 index 0000000..1f656a2 --- /dev/null +++ b/zimui/cypress/fixtures/page_content_123.json @@ -0,0 +1,3 @@ +{ + "htmlBody": "

Paragraph 1

Paragraph 2

" +} diff --git a/zimui/cypress/fixtures/shared.json b/zimui/cypress/fixtures/shared.json index 7674872..f496b3a 100644 --- a/zimui/cypress/fixtures/shared.json +++ b/zimui/cypress/fixtures/shared.json @@ -1 +1,11 @@ -{ "logoPath": "content/logo.png" } +{ + "logoPath": "content/logo.png", + "rootPagePath": "a_folder/a_page", + "pages": [ + { + "id": "123", + "title": "A page title", + "path": "a_folder/a_page" + } + ] +} diff --git a/zimui/public/.gitignore b/zimui/public/.gitignore index e69de29..6b584e8 100644 --- a/zimui/public/.gitignore +++ b/zimui/public/.gitignore @@ -0,0 +1 @@ +content \ No newline at end of file diff --git a/zimui/src/components/__tests__/HeaderBar.spec.ts b/zimui/src/components/__tests__/HeaderBar.spec.ts index e8c9366..1cb0161 100644 --- a/zimui/src/components/__tests__/HeaderBar.spec.ts +++ b/zimui/src/components/__tests__/HeaderBar.spec.ts @@ -33,7 +33,7 @@ describe('HeaderBar', () => { }) const main = useMainStore() const logoPath = 'content/logo.png' - main.shared = { logoPath: logoPath } + main.shared = { logoPath: logoPath, rootPagePath: '', pages: [] } const wrapper = mount(HeaderBar, { global: { diff --git a/zimui/src/router/index.ts b/zimui/src/router/index.ts index 85203d9..492b919 100644 --- a/zimui/src/router/index.ts +++ b/zimui/src/router/index.ts @@ -5,7 +5,7 @@ const router = createRouter({ history: createWebHashHistory(), routes: [ { - path: '/', + path: '/:pathMatch(.*)', name: 'home', component: HomeView } diff --git a/zimui/src/stores/home.ts b/zimui/src/stores/home.ts deleted file mode 100644 index 049bd28..0000000 --- a/zimui/src/stores/home.ts +++ /dev/null @@ -1,43 +0,0 @@ -import { defineStore } from 'pinia' -import axios, { AxiosError } from 'axios' -import type { Home } from '@/types/home' -import { useMainStore } from './main' - -export type RootState = { - home: Home | null -} - -export const useHomeStore = defineStore('home', { - state: () => - ({ - home: null - }) as RootState, - getters: {}, - actions: { - async fetchHome() { - const main = useMainStore() - main.isLoading = true - main.errorMessage = '' - main.errorDetails = '' - - return axios.get('./content/home.json').then( - (response) => { - main.isLoading = false - this.home = response.data as Home - }, - (error) => { - main.isLoading = false - this.home = null - main.errorMessage = 'Failed to load home data.' - if (error instanceof AxiosError) { - main.handleAxiosError(error) - } - } - ) - }, - setErrorMessage(message: string) { - const main = useMainStore() - main.errorMessage = message - } - } -}) diff --git a/zimui/src/stores/main.ts b/zimui/src/stores/main.ts index 07fbff2..286de30 100644 --- a/zimui/src/stores/main.ts +++ b/zimui/src/stores/main.ts @@ -1,9 +1,11 @@ import { defineStore } from 'pinia' import axios, { AxiosError } from 'axios' -import type { Shared } from '@/types/shared' +import type { PageContent, Shared, SharedPage } from '@/types/shared' export type RootState = { shared: Shared | null + pagesByPath: { [key: string]: SharedPage } + pageContent: PageContent | null isLoading: boolean errorMessage: string errorDetails: string @@ -13,6 +15,8 @@ export const useMainStore = defineStore('main', { state: () => ({ shared: null, + pagesByPath: {}, + pageContent: null, isLoading: false, errorMessage: '', errorDetails: '' @@ -28,6 +32,10 @@ export const useMainStore = defineStore('main', { (response) => { this.isLoading = false this.shared = response.data as Shared + this.pagesByPath = {} + this.shared.pages.forEach((page: SharedPage) => { + this.pagesByPath[page.path] = page + }) }, (error) => { this.isLoading = false @@ -39,6 +47,26 @@ export const useMainStore = defineStore('main', { } ) }, + async fetchPageContent(page: SharedPage) { + this.isLoading = true + this.errorMessage = '' + this.errorDetails = '' + + return axios.get(`./content/page_content_${page.id}.json`).then( + (response) => { + this.isLoading = false + this.pageContent = response.data as PageContent + }, + (error) => { + this.isLoading = false + this.shared = null + this.errorMessage = `Failed to load page content for page ${page.id}` + if (error instanceof AxiosError) { + this.handleAxiosError(error) + } + } + ) + }, checkResponseObject(response: unknown, msg: string = '') { if (response === null || typeof response !== 'object') { if (msg !== '') { diff --git a/zimui/src/types/home.ts b/zimui/src/types/home.ts deleted file mode 100644 index 397466b..0000000 --- a/zimui/src/types/home.ts +++ /dev/null @@ -1,3 +0,0 @@ -export interface Home { - welcomeTextParagraphs: string[] -} diff --git a/zimui/src/types/shared.ts b/zimui/src/types/shared.ts index 4f03566..9607422 100644 --- a/zimui/src/types/shared.ts +++ b/zimui/src/types/shared.ts @@ -1,3 +1,14 @@ +export interface SharedPage { + id: string + path: string + title: string +} export interface Shared { logoPath: string + rootPagePath: string + pages: SharedPage[] +} + +export interface PageContent { + htmlBody: string } diff --git a/zimui/src/views/HomeView.vue b/zimui/src/views/HomeView.vue index 46d2dbf..344cbfe 100644 --- a/zimui/src/views/HomeView.vue +++ b/zimui/src/views/HomeView.vue @@ -1,21 +1,49 @@