From 8cf133e74aea1653e10a130dbc7e1a85b49d129d Mon Sep 17 00:00:00 2001 From: Mat Moore Date: Sun, 26 May 2024 16:41:54 +0100 Subject: [PATCH] Add mechanism to mark and prune subtrees This enables you to trim down the vault after parsing the content, e.g. you can filter out anything without a specific header, or that isn't linked to by another page --- CHANGELOG.md | 11 +++++++++ lib/obsidian/parser/page.rb | 24 ++++++++++++++++++++ spec/obsidian/parser/page_spec.rb | 37 +++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 40533e9..4e7c6c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,35 +1,46 @@ ## [Unreleased] + - Added support for wikilinks that embed files. These are rendered as images or links in the HTML content. - Added method `#page.generate_html` to replace `#page.content.generate_html`, and removed the `MarkdownDocument` class. `#page.content` is now a callable that returns the markdown content. +- Added `#mark_referenced` and `#referenced?` to page objects +- Added `#prune!` method to page objects, to remove non-referenced pages ## [0.7.0] - 2023-08-03 + - Fix wikilinks pointing to slugs with spaces not rendering properly. - Links created from wikilinks now include a leading slash ## [0.6.1] - 2023-08-03 + - Prevent `HtmlRenderer` state being shared across documents ## [0.6.0] - 2023-08-03 + - Replace Kramdown with Markly - Enabled support for Github Flavored Markdown tables and tasklists - Rename `MarkdownContent` -> `MarkdownDocument`, `ObsidianFlavoredMarkdown` -> `MarkdownParser` ## [0.5.4] - 2023-08-02 + - Fix page getting clobbered when wikilinks point to non-existent pages. - Expand `[[foo/index]]` wiklinks to `[foo](foo)`. ## [0.5.3] - 2023-08-01 + - Support non-fully qualified titles when parsing wikilink syntax. - Autolink raw URLs. ## [0.5.2] - 2023-07-30 + - Fix handling of `index.md` at the root level. ## [0.5.0] - 2023-07-30 + - Fix ordering of `Page#children` so that index pages come first. - Fix handling of `index.md` documents so that the slug reflects the directory path. ## [0.4.0] - 2023-07-30 + - Unify `Note` and `Index` classes into `Page`. This is a breaking API change. `Parser#notes is replaced by Parse#pages`. Call `Page#is_index?`to distinguish between directory derived pages and documents. - Remove `Parser#table_of_contents` and `Parser#walk_tree`. - Add `Page#find_in_tree` to recursively search for a page with a matching slug. diff --git a/lib/obsidian/parser/page.rb b/lib/obsidian/parser/page.rb index 7560554..9606440 100644 --- a/lib/obsidian/parser/page.rb +++ b/lib/obsidian/parser/page.rb @@ -32,6 +32,7 @@ def initialize(title:, slug:, last_modified: nil, content: nil, parent: nil, con @content_type = content_type @media_root = media_root @source_path = source_path + @referenced = false end def is_index? @@ -163,6 +164,29 @@ def generate_html(markdown_parser: MarkdownParser.new) markdown_parser.parse(content.call, root: root, media_root: media_root).to_html end + def referenced? + @referenced + end + + # Mark the tree containing this page as being "referenced" + # i.e. reachable through links + def mark_referenced + @referenced = true + parent&.mark_referenced + end + + # Remove any child paths that are unreferenced, + # i.e. not reachable through links + def prune! + @children = @children.delete_if do |k, v| + !v.referenced? + end + + @children.values.each do |page| + page.prune! + end + end + attr_reader :title attr_reader :slug attr_reader :last_modified diff --git a/spec/obsidian/parser/page_spec.rb b/spec/obsidian/parser/page_spec.rb index 815afbe..af46892 100644 --- a/spec/obsidian/parser/page_spec.rb +++ b/spec/obsidian/parser/page_spec.rb @@ -99,4 +99,41 @@ expect(root.find_in_tree("foo/index")).to eq(page.parent) end end + + describe "#referenced?" do + it "is false by default" do + expect(root.referenced?).to eq(false) + end + + it "is true after #mark_referenced is called" do + root.mark_referenced + expect(root.referenced?).to eq(true) + end + + it "is true after #mark_referenced is called on a child node" do + page = root.add_page("foo/bar") + page.mark_referenced + expect(page.referenced?).to eq(true) + expect(page.parent.referenced?).to eq(true) + expect(root.referenced?).to eq(true) + end + end + + describe "#prune!" do + it "does not delete referenced pages" do + root.add_page("foo/bar").mark_referenced + root.prune! + expect(root.find_in_tree("foo/bar")).not_to be_nil + end + + it "deletes unreferenced pages" do + root.add_page("foo/bar").mark_referenced + page = root.add_page("foo/baz") + expect(page.referenced?).to eq(false) + root.prune! + expect(root.find_in_tree("foo")).not_to be_nil + expect(root.find_in_tree("foo/bar")).not_to be_nil + expect(root.find_in_tree("foo/baz")).to be_nil + end + end end