Skip to content

Commit

Permalink
Add pages to the ZIM and display them in the UI
Browse files Browse the repository at this point in the history
  • Loading branch information
benoit74 committed Oct 4, 2024
1 parent 18d81f3 commit 832e7cb
Show file tree
Hide file tree
Showing 20 changed files with 241 additions and 120 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/Tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ jobs:
docker build -t libretexts2zim .
- name: Run scraper
run: docker run -v $PWD/output:/output libretexts2zim libretexts2zim --library-slug geo --library-name Geosciences --file-name-format "tests_en_libretexts-geo"
run: docker run -v $PWD/output:/output libretexts2zim libretexts2zim --library-slug geo --library-name Geosciences --root-page-id 28207 --file-name-format "tests_en_libretexts-geo"

- name: Run integration test suite
run: docker run -v $PWD/scraper/tests-integration:/src/scraper/tests-integration -v $PWD/output:/output -e ZIM_FILE_PATH=/output/tests_en_libretexts-geo.zim libretexts2zim bash -c "pip install pytest; pytest -v /src/scraper/tests-integration"
6 changes: 3 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ See [README](README.md) for details about how to install with hatch virtualenv.

### Developing the ZIM UI in Vue.JS

When you want to alter something in the ZIM UI in Vue.JS, you need assets which are generated by the scraper (e.g. home.json, ...).
When you want to alter something in the ZIM UI in Vue.JS, you need assets which are generated by the scraper (e.g. shared.json, ...).

To simplify this, it is possible to:

Expand All @@ -24,10 +24,10 @@ To achieve this, first build the Docker image based on current code base.
docker build -t local-libretexts2zim .
```

Scrape a library (here we use the [GeoSciences](https://geo.libretexts.org) library, but you could use any other one of interest for your UI developments).
Scrape a library (here we use the [Geosciences](https://geo.libretexts.org) library, but you could use any other one of interest for your UI developments).

```
docker run --rm -it -v "$PWD/output":/output local-libretexts2zim libretexts2zim --library-slug geo --library-name Geosciences --file-name-format "tests_en_libretexts-geo"
docker run --rm -it -v "$PWD/output":/output local-libretexts2zim libretexts2zim --library-slug geo --library-name Geosciences --file-name-format "tests_en_libretexts-geo" --overwrite
```

Extract interesting ZIM content and move it to `public` folder.
Expand Down
45 changes: 42 additions & 3 deletions scraper/src/libretexts2zim/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,13 @@ class LibraryPage(BaseModel):

id: LibraryPageId
title: str
path: str
parent: "LibraryPage | None" = None
children: list["LibraryPage"] = []

def __repr__(self) -> str:
return (
f"WikiPage(id='{self.id}', title='{self.title}', "
f"WikiPage(id='{self.id}', title='{self.title}', path='{self.path}' "
f"parent='{'None' if not self.parent else self.parent.id}', "
f"children='{','.join([child.id for child in self.children])}')"
)
Expand All @@ -52,6 +53,12 @@ def self_and_parents(self) -> list["LibraryPage"]:
return result


class LibraryPageContent(BaseModel):
"""Content of a given library page"""

html_body: str


class LibraryTree(BaseModel):
"""Class holding information about the tree of pages on a given library"""

Expand Down Expand Up @@ -246,14 +253,19 @@ def get_page_tree(self) -> LibraryTree:
)

root = LibraryPage(
id=tree_data["page"]["@id"], title=tree_data["page"]["title"]
id=tree_data["page"]["@id"],
title=tree_data["page"]["title"],
path=tree_data["page"]["path"]["#text"],
)
tree_obj = LibraryTree(root=root)
tree_obj.pages[root.id] = root

def _add_page(page_node: Any, parent: LibraryPage) -> LibraryPage:
page = LibraryPage(
id=page_node["@id"], title=page_node["title"], parent=parent
id=page_node["@id"],
title=page_node["title"],
path=page_node["path"]["#text"],
parent=parent,
)
parent.children.append(page)
tree_obj.pages[page.id] = page
Expand All @@ -274,6 +286,33 @@ def _process_tree_data(page_node: Any, parent: LibraryPage) -> None:

return tree_obj

def get_page_content(self, page: LibraryPage) -> LibraryPageContent:
"""Returns the content of a given page"""

tree = self._get_api_json(

Check warning on line 292 in scraper/src/libretexts2zim/client.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/libretexts2zim/client.py#L292

Added line #L292 was not covered by tests
f"/pages/{page.id}/contents", timeout=HTTP_TIMEOUT_NORMAL_SECONDS
)
if not isinstance(tree["body"][0], str):
raise LibreTextsParsingError(

Check warning on line 296 in scraper/src/libretexts2zim/client.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/libretexts2zim/client.py#L296

Added line #L296 was not covered by tests
f"First body element of /pages/{page.id}/contents is not a string"
)
if not isinstance(tree["body"][1], dict):
raise LibreTextsParsingError(

Check warning on line 300 in scraper/src/libretexts2zim/client.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/libretexts2zim/client.py#L300

Added line #L300 was not covered by tests
f"Second body element of /pages/{page.id}/contents is not a dict"
)
if "@target" not in tree["body"][1]:
raise LibreTextsParsingError(

Check warning on line 304 in scraper/src/libretexts2zim/client.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/libretexts2zim/client.py#L304

Added line #L304 was not covered by tests
f"Unexpected second body element of /pages/{page.id}/contents, "
"no @target property"
)
if tree["body"][1]["@target"] != "toc":
raise LibreTextsParsingError(

Check warning on line 309 in scraper/src/libretexts2zim/client.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/libretexts2zim/client.py#L309

Added line #L309 was not covered by tests
f"Unexpected second body element of /pages/{page.id}/contents, "
f"@target property is '{tree["body"][1]["@target"]}' while only 'toc' "
"is expected"
)
return LibraryPageContent(html_body=tree["body"][0])

Check warning on line 314 in scraper/src/libretexts2zim/client.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/libretexts2zim/client.py#L314

Added line #L314 was not covered by tests


def _get_soup(content: str) -> BeautifulSoup:
"""Return a BeautifulSoup soup from textual content
Expand Down
45 changes: 30 additions & 15 deletions scraper/src/libretexts2zim/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@
LibreTextsMetadata,
)
from libretexts2zim.constants import LANGUAGE_ISO_639_3, NAME, ROOT_DIR, VERSION, logger
from libretexts2zim.ui import ConfigModel, HomeModel, SharedModel
from libretexts2zim.ui import (
ConfigModel,
PageContentModel,
PageModel,
SharedModel,
)
from libretexts2zim.zimconfig import ZimConfig


Expand Down Expand Up @@ -260,20 +265,6 @@ def run(self) -> Path:
stream_file(home.welcome_image_url, byte_stream=welcome_image)
add_item_for(creator, "content/logo.png", content=welcome_image.getvalue())
del welcome_image
add_item_for(
creator,
"content/shared.json",
content=SharedModel(logo_path="content/logo.png").model_dump_json(
by_alias=True
),
)
add_item_for(
creator,
"content/home.json",
content=HomeModel(
welcome_text_paragraphs=home.welcome_text_paragraphs
).model_dump_json(by_alias=True),
)

logger.info(f"Adding Vue.JS UI files in {self.zimui_dist}")
for file in self.zimui_dist.rglob("*"):
Expand Down Expand Up @@ -307,5 +298,29 @@ def run(self) -> Path:
f"{len(selected_pages)} pages (out of {len(pages_tree.pages)}) will be "
"fetched and pushed to the ZIM"
)
add_item_for(

Check warning on line 301 in scraper/src/libretexts2zim/processor.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/libretexts2zim/processor.py#L301

Added line #L301 was not covered by tests
creator,
"content/shared.json",
content=SharedModel(
logo_path="content/logo.png",
root_page_path=selected_pages[0].path, # root is always first
pages=[
PageModel(id=page.id, title=page.title, path=page.path)
for page in selected_pages
],
).model_dump_json(by_alias=True),
)

logger.info("Fetching pages content")

Check warning on line 314 in scraper/src/libretexts2zim/processor.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/libretexts2zim/processor.py#L314

Added line #L314 was not covered by tests
for page in selected_pages:
logger.debug(f" Fetching {page.id}")
page_content = self.libretexts_client.get_page_content(page)
add_item_for(

Check warning on line 318 in scraper/src/libretexts2zim/processor.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/libretexts2zim/processor.py#L316-L318

Added lines #L316 - L318 were not covered by tests
creator,
f"content/page_content_{page.id}.json",
content=PageContentModel(
html_body=page_content.html_body
).model_dump_json(by_alias=True),
)

return zim_path
12 changes: 10 additions & 2 deletions scraper/src/libretexts2zim/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,20 @@ class CamelModel(BaseModel):
model_config = ConfigDict(alias_generator=camelize, populate_by_name=True)


class HomeModel(CamelModel):
welcome_text_paragraphs: list[str]
class PageModel(CamelModel):
id: str
title: str
path: str


class PageContentModel(CamelModel):
html_body: str


class SharedModel(CamelModel):
logo_path: str
root_page_path: str
pages: list[PageModel]


class ConfigModel(CamelModel):
Expand Down
5 changes: 5 additions & 0 deletions scraper/tests-integration/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,8 @@ def test_get_home_welcome_text_paragraphs(
"""Ensures proper data is retrieved from home of libretexts"""

assert home.welcome_text_paragraphs == home_welcome_text_paragraphs


def test_get_home_page_content(client: LibreTextsClient, page_tree: LibraryTree):
"""Ensures we can get content of root page"""
assert client.get_page_content(page_tree.root).html_body
36 changes: 21 additions & 15 deletions scraper/tests-integration/test_zim_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,26 +56,22 @@ def test_zim_content_logo_png(zim_fh: Archive, home_png_size: int):
assert len(logo_png.content) == home_png_size # pyright: ignore


def test_zim_content_home_json(
zim_fh: Archive, home_welcome_text_paragraphs: list[str]
):
"""Ensure proper content at content/home.json"""

home_json = zim_fh.get_item("content/home.json")
assert home_json.mimetype == "application/json" # pyright: ignore
assert json.loads(bytes(home_json.content)) == { # pyright: ignore
"welcomeTextParagraphs": home_welcome_text_paragraphs
}


def test_zim_content_shared_json(zim_fh: Archive):
"""Ensure proper content at content/shared.json"""

shared_json = zim_fh.get_item("content/shared.json")
assert shared_json.mimetype == "application/json" # pyright: ignore
assert json.loads(bytes(shared_json.content)) == { # pyright: ignore
"logoPath": "content/logo.png"
}
shared_content = json.loads(bytes(shared_json.content)) # pyright: ignore
shared_content_keys = shared_content.keys()
assert "logoPath" in shared_content_keys
assert "rootPagePath" in shared_content_keys
assert "pages" in shared_content_keys
assert len(shared_content["pages"]) == 4
for page in shared_content["pages"]:
shared_content_page_keys = page.keys()
assert "id" in shared_content_page_keys
assert "title" in shared_content_page_keys
assert "path" in shared_content_page_keys


def test_zim_content_config_json(zim_fh: Archive):
Expand All @@ -86,3 +82,13 @@ def test_zim_content_config_json(zim_fh: Archive):
assert json.loads(bytes(config_json.content)) == { # pyright: ignore
"secondaryColor": "#FFFFFF"
}


@pytest.mark.parametrize("page_id", [28207, 28208, 28209, 28212])
def test_zim_content_page_content_json(page_id: str, zim_fh: Archive):
"""Ensure proper content at content/config.json"""

config_json = zim_fh.get_item(f"content/page_content_{page_id}.json")
assert config_json.mimetype == "application/json" # pyright: ignore
page_content_keys = json.loads(bytes(config_json.content)).keys() # pyright: ignore
assert "htmlBody" in page_content_keys
38 changes: 25 additions & 13 deletions scraper/tests/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,30 +6,42 @@

@pytest.fixture(scope="module")
def library_tree() -> LibraryTree:
root = LibraryPage(id="24", title="Home page")
topic1 = LibraryPage(id="25", title="1: First topic", parent=root)
root = LibraryPage(id="24", title="Home page", path="")
topic1 = LibraryPage(
id="25", title="1: First topic", path="1_First_Topic", parent=root
)
root.children.append(topic1)
topic1_1 = LibraryPage(id="26", title="1.1: Cloud", parent=topic1)
topic1_1 = LibraryPage(id="26", title="1.1: Cloud", path="1.1_Cloud", parent=topic1)
topic1.children.append(topic1_1)
topic1_2 = LibraryPage(id="27", title="1.2: Tree", parent=topic1)
topic1_2 = LibraryPage(id="27", title="1.2: Tree", path="1.2_Tree", parent=topic1)
topic1.children.append(topic1_2)
topic1_3 = LibraryPage(id="28", title="1.3: Bees", parent=topic1)
topic1_3 = LibraryPage(id="28", title="1.3: Bees", path="1.3_Bees", parent=topic1)
topic1.children.append(topic1_3)
topic2 = LibraryPage(id="29", title="2: Second topic", parent=root)
topic2 = LibraryPage(
id="29", title="2: Second topic", path="2_Second_Topic", parent=root
)
root.children.append(topic2)
topic2_1 = LibraryPage(id="30", title="2.1: Underground", parent=topic2)
topic2_1 = LibraryPage(
id="30", title="2.1: Underground", path="2.1_Underground", parent=topic2
)
topic2.children.append(topic2_1)
topic2_2 = LibraryPage(id="31", title="2.2: Lava", parent=topic2)
topic2_2 = LibraryPage(id="31", title="2.2: Lava", path="2.2_Lava", parent=topic2)
topic2.children.append(topic2_2)
topic2_3 = LibraryPage(id="32", title="2.3: Volcano", parent=topic2)
topic2_3 = LibraryPage(
id="32", title="2.3: Volcano", path="2.3_Volcano", parent=topic2
)
topic2.children.append(topic2_3)
topic3 = LibraryPage(id="33", title="3: Third topic", parent=root)
topic3 = LibraryPage(
id="33", title="3: Third topic", path="3_Third_Topic", parent=root
)
root.children.append(topic3)
topic3_1 = LibraryPage(id="34", title="3.1: Ground", parent=topic3)
topic3_1 = LibraryPage(
id="34", title="3.1: Ground", path="3.1_Ground", parent=topic3
)
topic3.children.append(topic3_1)
topic3_2 = LibraryPage(id="35", title="3.2: Earth", parent=topic3)
topic3_2 = LibraryPage(id="35", title="3.2: Earth", path="3.2_Earth", parent=topic3)
topic3.children.append(topic3_2)
topic3_3 = LibraryPage(id="36", title="3.3: Sky", parent=topic3)
topic3_3 = LibraryPage(id="36", title="3.3: Sky", path="3.3_Sky", parent=topic3)
topic3.children.append(topic3_3)
return LibraryTree(
root=root,
Expand Down
14 changes: 8 additions & 6 deletions zimui/cypress/e2e/home.cy.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
describe('Home of the ZIM UI', () => {
beforeEach(() => {
cy.intercept('GET', '/content/home.json', { fixture: 'home.json' }).as('getHome')
cy.intercept('GET', '/content/shared.json', { fixture: 'shared.json' }).as('getShared')
cy.intercept('GET', '/content/config.json', { fixture: 'config.json' }).as('getConfig')
cy.intercept('GET', '/content/shared.json', { fixture: 'shared.json' }).as('getShared')
cy.intercept('GET', '/content/page_content_123.json', { fixture: 'page_content_123.json' }).as(
'getPage'
)
cy.visit('/')
cy.wait('@getHome')
cy.wait('@getShared')
cy.wait('@getConfig')
cy.wait('@getShared')
cy.wait('@getPage')
})

it('loads the proper header image', () => {
Expand All @@ -17,8 +19,8 @@ describe('Home of the ZIM UI', () => {
})

it('loads the first paragraph only once', () => {
cy.contains('p', 'Paragraph 2').should('be.visible')
cy.get('p:contains("Paragraph 2")').should('have.length', 1)
cy.contains('p', 'Paragraph 1').should('be.visible')
cy.get('p:contains("Paragraph 1")').should('have.length', 1)
})

it('loads the second paragraph only once', () => {
Expand Down
1 change: 0 additions & 1 deletion zimui/cypress/fixtures/home.json

This file was deleted.

3 changes: 3 additions & 0 deletions zimui/cypress/fixtures/page_content_123.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"htmlBody": "<p>Paragraph 1</p><p>Paragraph 2</p>"
}
12 changes: 11 additions & 1 deletion zimui/cypress/fixtures/shared.json
Original file line number Diff line number Diff line change
@@ -1 +1,11 @@
{ "logoPath": "content/logo.png" }
{
"logoPath": "content/logo.png",
"rootPagePath": "a_folder/a_page",
"pages": [
{
"id": "123",
"title": "A page title",
"path": "a_folder/a_page"
}
]
}
1 change: 1 addition & 0 deletions zimui/public/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
content
2 changes: 1 addition & 1 deletion zimui/src/components/__tests__/HeaderBar.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ describe('HeaderBar', () => {
})
const main = useMainStore()
const logoPath = 'content/logo.png'
main.shared = { logoPath: logoPath }
main.shared = { logoPath: logoPath, rootPagePath: '', pages: [] }

const wrapper = mount(HeaderBar, {
global: {
Expand Down
2 changes: 1 addition & 1 deletion zimui/src/router/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ const router = createRouter({
history: createWebHashHistory(),
routes: [
{
path: '/',
path: '/:pathMatch(.*)',
name: 'home',
component: HomeView
}
Expand Down
Loading

0 comments on commit 832e7cb

Please sign in to comment.