Skip to content

Commit

Permalink
Merge pull request #28 from openzim/add_pages_to_zim
Browse files Browse the repository at this point in the history
Add pages to the ZIM and display them in the UI
  • Loading branch information
benoit74 authored Oct 8, 2024
2 parents 18d81f3 + c3b6266 commit 733c35a
Show file tree
Hide file tree
Showing 20 changed files with 243 additions and 121 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/Tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ jobs:
docker build -t libretexts2zim .
- name: Run scraper
run: docker run -v $PWD/output:/output libretexts2zim libretexts2zim --library-slug geo --library-name Geosciences --file-name-format "tests_en_libretexts-geo"
run: docker run -v $PWD/output:/output libretexts2zim libretexts2zim --library-slug geo --library-name Geosciences --root-page-id 28207 --file-name-format "tests_en_libretexts-geo"

- name: Run integration test suite
run: docker run -v $PWD/scraper/tests-integration:/src/scraper/tests-integration -v $PWD/output:/output -e ZIM_FILE_PATH=/output/tests_en_libretexts-geo.zim libretexts2zim bash -c "pip install pytest; pytest -v /src/scraper/tests-integration"
6 changes: 3 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ See [README](README.md) for details about how to install with hatch virtualenv.

### Developing the ZIM UI in Vue.JS

When you want to alter something in the ZIM UI in Vue.JS, you need assets which are generated by the scraper (e.g. home.json, ...).
When you want to alter something in the ZIM UI in Vue.JS, you need assets which are generated by the scraper (e.g. shared.json, ...).

To simplify this, it is possible to:

Expand All @@ -24,10 +24,10 @@ To achieve this, first build the Docker image based on current code base.
docker build -t local-libretexts2zim .
```

Scrape a library (here we use the [GeoSciences](https://geo.libretexts.org) library, but you could use any other one of interest for your UI developments).
Scrape a library (here we use the [Geosciences](https://geo.libretexts.org) library, but you could use any other one of interest for your UI developments).

```
docker run --rm -it -v "$PWD/output":/output local-libretexts2zim libretexts2zim --library-slug geo --library-name Geosciences --file-name-format "tests_en_libretexts-geo"
docker run --rm -it -v "$PWD/output":/output local-libretexts2zim libretexts2zim --library-slug geo --library-name Geosciences --file-name-format "tests_en_libretexts-geo" --overwrite
```

Extract interesting ZIM content and move it to `public` folder.
Expand Down
45 changes: 42 additions & 3 deletions scraper/src/libretexts2zim/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,13 @@ class LibraryPage(BaseModel):

id: LibraryPageId
title: str
path: str
parent: "LibraryPage | None" = None
children: list["LibraryPage"] = []

def __repr__(self) -> str:
return (
f"WikiPage(id='{self.id}', title='{self.title}', "
f"WikiPage(id='{self.id}', title='{self.title}', path='{self.path}' "
f"parent='{'None' if not self.parent else self.parent.id}', "
f"children='{','.join([child.id for child in self.children])}')"
)
Expand All @@ -52,6 +53,12 @@ def self_and_parents(self) -> list["LibraryPage"]:
return result


class LibraryPageContent(BaseModel):
"""Content of a given library page"""

html_body: str


class LibraryTree(BaseModel):
"""Class holding information about the tree of pages on a given library"""

Expand Down Expand Up @@ -246,14 +253,19 @@ def get_page_tree(self) -> LibraryTree:
)

root = LibraryPage(
id=tree_data["page"]["@id"], title=tree_data["page"]["title"]
id=tree_data["page"]["@id"],
title=tree_data["page"]["title"],
path=tree_data["page"]["path"]["#text"],
)
tree_obj = LibraryTree(root=root)
tree_obj.pages[root.id] = root

def _add_page(page_node: Any, parent: LibraryPage) -> LibraryPage:
page = LibraryPage(
id=page_node["@id"], title=page_node["title"], parent=parent
id=page_node["@id"],
title=page_node["title"],
path=page_node["path"]["#text"],
parent=parent,
)
parent.children.append(page)
tree_obj.pages[page.id] = page
Expand All @@ -274,6 +286,33 @@ def _process_tree_data(page_node: Any, parent: LibraryPage) -> None:

return tree_obj

def get_page_content(self, page: LibraryPage) -> LibraryPageContent:
"""Returns the content of a given page"""

tree = self._get_api_json(
f"/pages/{page.id}/contents", timeout=HTTP_TIMEOUT_NORMAL_SECONDS
)
if not isinstance(tree["body"][0], str):
raise LibreTextsParsingError(
f"First body element of /pages/{page.id}/contents is not a string"
)
if not isinstance(tree["body"][1], dict):
raise LibreTextsParsingError(
f"Second body element of /pages/{page.id}/contents is not a dict"
)
if "@target" not in tree["body"][1]:
raise LibreTextsParsingError(
f"Unexpected second body element of /pages/{page.id}/contents, "
"no @target property"
)
if tree["body"][1]["@target"] != "toc":
raise LibreTextsParsingError(
f"Unexpected second body element of /pages/{page.id}/contents, "
f"@target property is '{tree["body"][1]["@target"]}' while only 'toc' "
"is expected"
)
return LibraryPageContent(html_body=tree["body"][0])


def _get_soup(content: str) -> BeautifulSoup:
"""Return a BeautifulSoup soup from textual content
Expand Down
48 changes: 32 additions & 16 deletions scraper/src/libretexts2zim/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@
LibreTextsMetadata,
)
from libretexts2zim.constants import LANGUAGE_ISO_639_3, NAME, ROOT_DIR, VERSION, logger
from libretexts2zim.ui import ConfigModel, HomeModel, SharedModel
from libretexts2zim.ui import (
ConfigModel,
PageContentModel,
PageModel,
SharedModel,
)
from libretexts2zim.zimconfig import ZimConfig


Expand Down Expand Up @@ -260,20 +265,6 @@ def run(self) -> Path:
stream_file(home.welcome_image_url, byte_stream=welcome_image)
add_item_for(creator, "content/logo.png", content=welcome_image.getvalue())
del welcome_image
add_item_for(
creator,
"content/shared.json",
content=SharedModel(logo_path="content/logo.png").model_dump_json(
by_alias=True
),
)
add_item_for(
creator,
"content/home.json",
content=HomeModel(
welcome_text_paragraphs=home.welcome_text_paragraphs
).model_dump_json(by_alias=True),
)

logger.info(f"Adding Vue.JS UI files in {self.zimui_dist}")
for file in self.zimui_dist.rglob("*"):
Expand All @@ -287,7 +278,8 @@ def run(self) -> Path:
creator=creator,
path=path,
content=index_html_path.read_text(encoding="utf-8").replace(
"<title>Vite App</title>", formatted_config.title_format
"<title>Vite App</title>",
f"<title>{formatted_config.title_format}</title>",
),
mimetype="text/html",
is_front=True,
Expand All @@ -307,5 +299,29 @@ def run(self) -> Path:
f"{len(selected_pages)} pages (out of {len(pages_tree.pages)}) will be "
"fetched and pushed to the ZIM"
)
add_item_for(
creator,
"content/shared.json",
content=SharedModel(
logo_path="content/logo.png",
root_page_path=selected_pages[0].path, # root is always first
pages=[
PageModel(id=page.id, title=page.title, path=page.path)
for page in selected_pages
],
).model_dump_json(by_alias=True),
)

logger.info("Fetching pages content")
for page in selected_pages:
logger.debug(f" Fetching {page.id}")
page_content = self.libretexts_client.get_page_content(page)
add_item_for(
creator,
f"content/page_content_{page.id}.json",
content=PageContentModel(
html_body=page_content.html_body
).model_dump_json(by_alias=True),
)

return zim_path
12 changes: 10 additions & 2 deletions scraper/src/libretexts2zim/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,20 @@ class CamelModel(BaseModel):
model_config = ConfigDict(alias_generator=camelize, populate_by_name=True)


class HomeModel(CamelModel):
welcome_text_paragraphs: list[str]
class PageModel(CamelModel):
id: str
title: str
path: str


class PageContentModel(CamelModel):
html_body: str


class SharedModel(CamelModel):
logo_path: str
root_page_path: str
pages: list[PageModel]


class ConfigModel(CamelModel):
Expand Down
5 changes: 5 additions & 0 deletions scraper/tests-integration/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,8 @@ def test_get_home_welcome_text_paragraphs(
"""Ensures proper data is retrieved from home of libretexts"""

assert home.welcome_text_paragraphs == home_welcome_text_paragraphs


def test_get_home_page_content(client: LibreTextsClient, page_tree: LibraryTree):
"""Ensures we can get content of root page"""
assert client.get_page_content(page_tree.root).html_body
36 changes: 21 additions & 15 deletions scraper/tests-integration/test_zim_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,26 +56,22 @@ def test_zim_content_logo_png(zim_fh: Archive, home_png_size: int):
assert len(logo_png.content) == home_png_size # pyright: ignore


def test_zim_content_home_json(
zim_fh: Archive, home_welcome_text_paragraphs: list[str]
):
"""Ensure proper content at content/home.json"""

home_json = zim_fh.get_item("content/home.json")
assert home_json.mimetype == "application/json" # pyright: ignore
assert json.loads(bytes(home_json.content)) == { # pyright: ignore
"welcomeTextParagraphs": home_welcome_text_paragraphs
}


def test_zim_content_shared_json(zim_fh: Archive):
"""Ensure proper content at content/shared.json"""

shared_json = zim_fh.get_item("content/shared.json")
assert shared_json.mimetype == "application/json" # pyright: ignore
assert json.loads(bytes(shared_json.content)) == { # pyright: ignore
"logoPath": "content/logo.png"
}
shared_content = json.loads(bytes(shared_json.content)) # pyright: ignore
shared_content_keys = shared_content.keys()
assert "logoPath" in shared_content_keys
assert "rootPagePath" in shared_content_keys
assert "pages" in shared_content_keys
assert len(shared_content["pages"]) == 4
for page in shared_content["pages"]:
shared_content_page_keys = page.keys()
assert "id" in shared_content_page_keys
assert "title" in shared_content_page_keys
assert "path" in shared_content_page_keys


def test_zim_content_config_json(zim_fh: Archive):
Expand All @@ -86,3 +82,13 @@ def test_zim_content_config_json(zim_fh: Archive):
assert json.loads(bytes(config_json.content)) == { # pyright: ignore
"secondaryColor": "#FFFFFF"
}


@pytest.mark.parametrize("page_id", [28207, 28208, 28209, 28212])
def test_zim_content_page_content_json(page_id: str, zim_fh: Archive):
"""Ensure proper content at content/config.json"""

config_json = zim_fh.get_item(f"content/page_content_{page_id}.json")
assert config_json.mimetype == "application/json" # pyright: ignore
page_content_keys = json.loads(bytes(config_json.content)).keys() # pyright: ignore
assert "htmlBody" in page_content_keys
38 changes: 25 additions & 13 deletions scraper/tests/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,30 +6,42 @@

@pytest.fixture(scope="module")
def library_tree() -> LibraryTree:
root = LibraryPage(id="24", title="Home page")
topic1 = LibraryPage(id="25", title="1: First topic", parent=root)
root = LibraryPage(id="24", title="Home page", path="")
topic1 = LibraryPage(
id="25", title="1: First topic", path="1_First_Topic", parent=root
)
root.children.append(topic1)
topic1_1 = LibraryPage(id="26", title="1.1: Cloud", parent=topic1)
topic1_1 = LibraryPage(id="26", title="1.1: Cloud", path="1.1_Cloud", parent=topic1)
topic1.children.append(topic1_1)
topic1_2 = LibraryPage(id="27", title="1.2: Tree", parent=topic1)
topic1_2 = LibraryPage(id="27", title="1.2: Tree", path="1.2_Tree", parent=topic1)
topic1.children.append(topic1_2)
topic1_3 = LibraryPage(id="28", title="1.3: Bees", parent=topic1)
topic1_3 = LibraryPage(id="28", title="1.3: Bees", path="1.3_Bees", parent=topic1)
topic1.children.append(topic1_3)
topic2 = LibraryPage(id="29", title="2: Second topic", parent=root)
topic2 = LibraryPage(
id="29", title="2: Second topic", path="2_Second_Topic", parent=root
)
root.children.append(topic2)
topic2_1 = LibraryPage(id="30", title="2.1: Underground", parent=topic2)
topic2_1 = LibraryPage(
id="30", title="2.1: Underground", path="2.1_Underground", parent=topic2
)
topic2.children.append(topic2_1)
topic2_2 = LibraryPage(id="31", title="2.2: Lava", parent=topic2)
topic2_2 = LibraryPage(id="31", title="2.2: Lava", path="2.2_Lava", parent=topic2)
topic2.children.append(topic2_2)
topic2_3 = LibraryPage(id="32", title="2.3: Volcano", parent=topic2)
topic2_3 = LibraryPage(
id="32", title="2.3: Volcano", path="2.3_Volcano", parent=topic2
)
topic2.children.append(topic2_3)
topic3 = LibraryPage(id="33", title="3: Third topic", parent=root)
topic3 = LibraryPage(
id="33", title="3: Third topic", path="3_Third_Topic", parent=root
)
root.children.append(topic3)
topic3_1 = LibraryPage(id="34", title="3.1: Ground", parent=topic3)
topic3_1 = LibraryPage(
id="34", title="3.1: Ground", path="3.1_Ground", parent=topic3
)
topic3.children.append(topic3_1)
topic3_2 = LibraryPage(id="35", title="3.2: Earth", parent=topic3)
topic3_2 = LibraryPage(id="35", title="3.2: Earth", path="3.2_Earth", parent=topic3)
topic3.children.append(topic3_2)
topic3_3 = LibraryPage(id="36", title="3.3: Sky", parent=topic3)
topic3_3 = LibraryPage(id="36", title="3.3: Sky", path="3.3_Sky", parent=topic3)
topic3.children.append(topic3_3)
return LibraryTree(
root=root,
Expand Down
14 changes: 8 additions & 6 deletions zimui/cypress/e2e/home.cy.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
describe('Home of the ZIM UI', () => {
beforeEach(() => {
cy.intercept('GET', '/content/home.json', { fixture: 'home.json' }).as('getHome')
cy.intercept('GET', '/content/shared.json', { fixture: 'shared.json' }).as('getShared')
cy.intercept('GET', '/content/config.json', { fixture: 'config.json' }).as('getConfig')
cy.intercept('GET', '/content/shared.json', { fixture: 'shared.json' }).as('getShared')
cy.intercept('GET', '/content/page_content_123.json', { fixture: 'page_content_123.json' }).as(
'getPage'
)
cy.visit('/')
cy.wait('@getHome')
cy.wait('@getShared')
cy.wait('@getConfig')
cy.wait('@getShared')
cy.wait('@getPage')
})

it('loads the proper header image', () => {
Expand All @@ -17,8 +19,8 @@ describe('Home of the ZIM UI', () => {
})

it('loads the first paragraph only once', () => {
cy.contains('p', 'Paragraph 2').should('be.visible')
cy.get('p:contains("Paragraph 2")').should('have.length', 1)
cy.contains('p', 'Paragraph 1').should('be.visible')
cy.get('p:contains("Paragraph 1")').should('have.length', 1)
})

it('loads the second paragraph only once', () => {
Expand Down
1 change: 0 additions & 1 deletion zimui/cypress/fixtures/home.json

This file was deleted.

3 changes: 3 additions & 0 deletions zimui/cypress/fixtures/page_content_123.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"htmlBody": "<p>Paragraph 1</p><p>Paragraph 2</p>"
}
12 changes: 11 additions & 1 deletion zimui/cypress/fixtures/shared.json
Original file line number Diff line number Diff line change
@@ -1 +1,11 @@
{ "logoPath": "content/logo.png" }
{
"logoPath": "content/logo.png",
"rootPagePath": "a_folder/a_page",
"pages": [
{
"id": "123",
"title": "A page title",
"path": "a_folder/a_page"
}
]
}
1 change: 1 addition & 0 deletions zimui/public/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
content
2 changes: 1 addition & 1 deletion zimui/src/components/__tests__/HeaderBar.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ describe('HeaderBar', () => {
})
const main = useMainStore()
const logoPath = 'content/logo.png'
main.shared = { logoPath: logoPath }
main.shared = { logoPath: logoPath, rootPagePath: '', pages: [] }

const wrapper = mount(HeaderBar, {
global: {
Expand Down
2 changes: 1 addition & 1 deletion zimui/src/router/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ const router = createRouter({
history: createWebHashHistory(),
routes: [
{
path: '/',
path: '/:pathMatch(.*)',
name: 'home',
component: HomeView
}
Expand Down
Loading

0 comments on commit 733c35a

Please sign in to comment.