Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP canonical URL rewriting functionality #78

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# DocumenterTools.jl changelog

## Unreleased

* ![Enhancement][badge-enhancement] DocumenterTools now provides a `DocumenterTools.walkdocs` function. ([#75][github-75])

## Version `v0.1.17`

* ![Enhancement][badge-enhancement] The compiled CSS files generated by DocumenterTools are now minified. ([#71][github-71])
Expand Down Expand Up @@ -102,6 +106,7 @@ Maintenance release declaring compatibility with Documenter 0.25. ([#39][github-
[github-64]: https://github.com/JuliaDocs/DocumenterTools.jl/issues/64
[github-65]: https://github.com/JuliaDocs/DocumenterTools.jl/pull/65
[github-71]: https://github.com/JuliaDocs/DocumenterTools.jl/pull/71
[github-75]: https://github.com/JuliaDocs/DocumenterTools.jl/pull/75


[badge-breaking]: https://img.shields.io/badge/BREAKING-red.svg
Expand Down
4 changes: 4 additions & 0 deletions src/DocumenterTools.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
module DocumenterTools

import AbstractTrees
import Gumbo
using DocStringExtensions

"""
Expand Down Expand Up @@ -174,6 +176,8 @@ function package_devpath(pkg::Module)
return normpath(joinpath(path, "..", ".."))
end

include("walkdocs.jl")
include("canonical_links.jl")
include("genkeys.jl")
include("Generator.jl")
include("Themes.jl")
Expand Down
25 changes: 12 additions & 13 deletions src/OutdatedWarning.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
export OutdatedWarning

module OutdatedWarning
import ..DocumenterTools
using Gumbo, AbstractTrees, Documenter

OLD_VERSION_CSS = replace("""
Expand Down Expand Up @@ -168,20 +169,18 @@
end

print(io, "Processing $(dir): ")
for (root, _, files) in walkdir(path)
for file in files
_, ext = splitext(file)
if ext == ".html"
try
did_change = add_old_docs_notice(joinpath(root, file), force)
print(io, did_change ? "✓" : ".")
catch err
if err isa InterruptException
rethrow()
end
@debug "Fatally failed to add a outdated warning" exception = (err, catch_backtrace())
print(io, "!")
DocumenterTools.walkdocs(path) do fileinfo
_, ext = splitext(fileinfo.filename)
if ext == ".html"
try
did_change = add_old_docs_notice(fileinfo.fullpath, force)
print(io, did_change ? "✓" : ".")
catch err
if err isa InterruptException
rethrow()

Check warning on line 180 in src/OutdatedWarning.jl

View check run for this annotation

Codecov / codecov/patch

src/OutdatedWarning.jl#L179-L180

Added lines #L179 - L180 were not covered by tests
end
@debug "Fatally failed to add a outdated warning" exception = (err, catch_backtrace())
print(io, "!")

Check warning on line 183 in src/OutdatedWarning.jl

View check run for this annotation

Codecov / codecov/patch

src/OutdatedWarning.jl#L182-L183

Added lines #L182 - L183 were not covered by tests
end
end
end
Expand Down
180 changes: 180 additions & 0 deletions src/canonical_links.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
"""
DocumenterTools.update_canonical_links_for_build(
docs_directory::AbstractString;
canonical::AbstractString,
)

- **`canonical`**: corresponds to the `canonical` attribute of `Documenter.HTML`,
specifying the root of the canonical URL.
"""
function update_canonical_links_for_version(

Check warning on line 10 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L10

Added line #L10 was not covered by tests
docs_directory::AbstractString;
canonical::AbstractString
)
canonical = rstrip(canonical, '/')

Check warning on line 14 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L14

Added line #L14 was not covered by tests

walkdocs(docs_directory) do fileinfo
@debug "update_canonical_links: checking $(fileinfo.relpath)"

Check warning on line 17 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L16-L17

Added lines #L16 - L17 were not covered by tests
# Determine the
filepath = splitpath(fileinfo.relpath)
new_canonical_href = if filepath[end] == "index.html"
joinurl(canonical, filepath[1:end-1]...) * '/'

Check warning on line 21 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L19-L21

Added lines #L19 - L21 were not covered by tests
else
joinurl(canonical, filepath[1:end]...)

Check warning on line 23 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L23

Added line #L23 was not covered by tests
end

html = Gumbo.parsehtml(read(fileinfo.fullpath, String))
n_canonical_tags::Int = 0
dom_updated::Bool = false
for e in AbstractTrees.PreOrderDFS(html.root)
is_canonical_element(e) || continue
n_canonical_tags += 1
canonical_href = Gumbo.getattr(e, "href", nothing)
if canonical_href != new_canonical_href
Gumbo.setattr!(e, "href", new_canonical_href)
@warn "canonical_href updated" canonical_href new_canonical_href fileinfo.relpath
dom_updated = true

Check warning on line 36 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L26-L36

Added lines #L26 - L36 were not covered by tests
end
end
if n_canonical_tags == 0
for e in AbstractTrees.PreOrderDFS(html.root)
e isa Gumbo.HTMLElement || continue
Gumbo.tag(e) == :head || continue
canonical_href_element = Gumbo.HTMLElement{:link}(

Check warning on line 43 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L38-L43

Added lines #L38 - L43 were not covered by tests
[], e, Dict(
"rel" => "canonical",
"href" => new_canonical_href,
)
)
push!(e.children, canonical_href_element)
@warn "Added new canonical_href" new_canonical_href fileinfo.relpath
dom_updated = true
break
end

Check warning on line 53 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L49-L53

Added lines #L49 - L53 were not covered by tests
end
if dom_updated
open(io -> print(io, html), fileinfo.fullpath, "w")

Check warning on line 56 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L55-L56

Added lines #L55 - L56 were not covered by tests
end
if n_canonical_tags > 1
@error "Multiple canonical tags!" file = fileinfo.relpath

Check warning on line 59 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L58-L59

Added lines #L58 - L59 were not covered by tests
end
end
end

is_canonical_element(e) = (e isa Gumbo.HTMLElement) && (Gumbo.tag(e) == :link) && (Gumbo.getattr(e, "rel", nothing) == "canonical")
joinurl(ps::AbstractString...) = join(ps, '/')

Check warning on line 65 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L64-L65

Added lines #L64 - L65 were not covered by tests

function update_canonical_links(

Check warning on line 67 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L67

Added line #L67 was not covered by tests
docs_directory::AbstractString;
canonical::AbstractString
)
canonical = rstrip(canonical, '/')
docs_directory = abspath(docs_directory)
isdir(docs_directory) || throw(ArgumentError("No such directory: $(docs_directory)"))

Check warning on line 73 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L71-L73

Added lines #L71 - L73 were not covered by tests

# Try to extract the list of versions from versions.js
versions_js = joinpath(docs_directory, "versions.js")
isfile(versions_js) || throw(ArgumentError("versions.js is missing in $(docs_directory)"))
versions = map(extract_versions_list(versions_js)) do version_str
isversion, version_number = if occursin(Base.VERSION_REGEX, version_str)
true, VersionNumber(version_str)

Check warning on line 80 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L76-L80

Added lines #L76 - L80 were not covered by tests
else
false, nothing

Check warning on line 82 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L82

Added line #L82 was not covered by tests
end
fullpath = joinpath(docs_directory, version_str)
return (;

Check warning on line 85 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L84-L85

Added lines #L84 - L85 were not covered by tests
path = version_str,
path_exists = isdir(fullpath) || islink(fullpath),
symlink = islink(fullpath),
isversion,
version_number,
fullpath,
)
end
# We'll filter out a couple of potential bad cases and issue warnings
filter(versions) do vi
if !vi.path_exists
@warn "update_canonical_links: path does not exists or is not a directory" docs_directory vi
return false

Check warning on line 98 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L95-L98

Added lines #L95 - L98 were not covered by tests
end
return true

Check warning on line 100 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L100

Added line #L100 was not covered by tests
end
# We need to determine the canonical path. This would usually be something like the stable/
# directory, but it can have a different name, including being a version number. So first we
# try to find a non-version directory _that is a symlink_ (so that it wouldn't get confused)
# previews/ or dev builds. If that fails, we try to find the directory matching `v[0-9]+`,
# with the highest version number. This does not cover all possible cases, but should be good
# enough for now.
#
# TODO: we could also try to parse the canonical URL from the index.html, and only fall
# back to versions.js when the canonical URL is not present.
non_version_symlinks = filter(vi -> !vi.isversion && vi.symlink, versions)
canonical_version = if isempty(non_version_symlinks)

Check warning on line 112 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L111-L112

Added lines #L111 - L112 were not covered by tests
# We didn't find any non-version symlinks, so we'll try to find the vN directory now
# as a fallback.
version_symlinks = map(versions) do vi
if !(vi.symlink && vi.isversion)
return nothing

Check warning on line 117 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L115-L117

Added lines #L115 - L117 were not covered by tests
end
m = match(r"^([0-9]+)$", vi.path)
isnothing(m) && return nothing
parse(Int, m[1]) => vi

Check warning on line 121 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L119-L121

Added lines #L119 - L121 were not covered by tests
end
filter!(!isnothing, version_symlinks)
if isempty(version_symlinks)
error("Unable to determine the canonical path. Found no version directories")

Check warning on line 125 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L123-L125

Added lines #L123 - L125 were not covered by tests
end
_, idx = findmax(first, version_symlinks)
version_symlinks[idx][2]
elseif length(non_version_symlinks) > 1
error("Unable to determine the canonical path. Found multiple non-version symlinks.\n$(non_version_symlinks)")

Check warning on line 130 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L127-L130

Added lines #L127 - L130 were not covered by tests
else
only(non_version_symlinks)

Check warning on line 132 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L132

Added line #L132 was not covered by tests
end
canonical_full_root = joinurl(canonical, canonical_version.path)

Check warning on line 134 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L134

Added line #L134 was not covered by tests
# If we have determined which version should be the canonical version, we can actually
# go and run update_canonical_links_for_version on each directory.
for filename in readdir(docs_directory)
path = joinpath(docs_directory, filename)

Check warning on line 138 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L137-L138

Added lines #L137 - L138 were not covered by tests
# We'll skip all files. This includes files such as index.html, which in this
# directory will likely be the redirect. Also, links should be pointing to other
# versions, so we'll skip them too.
if islink(path) || !isdir(path)
continue

Check warning on line 143 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L142-L143

Added lines #L142 - L143 were not covered by tests
end
# For true directories, we check that siteinfo.js file is present, which is a pretty
# good indicator that it's a proper Documenter build.
if !isfile(joinpath(path, "siteinfo.js"))

Check warning on line 147 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L147

Added line #L147 was not covered by tests
# We want to warn if we run across any directories that are not Documenter builds.
# But previews/ is one valid case which may be present and so we shouldn't warn
# for this one.
if filename != "previews"
@warn "update_canonical_links: skipping directory that does not look like a Documenter build" filename docs_directory

Check warning on line 152 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L151-L152

Added lines #L151 - L152 were not covered by tests
end
continue

Check warning on line 154 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L154

Added line #L154 was not covered by tests
end
# Finally, we can run update_canonical_links_for_version on the directory.
@info "Updating canonical URLs for" docs_directory filename canonical_full_root
update_canonical_links_for_version(path; canonical = canonical_full_root)
end

Check warning on line 159 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L157-L159

Added lines #L157 - L159 were not covered by tests
end

function extract_versions_list(versions_js::AbstractString)
versions_js = abspath(versions_js)
isfile(versions_js) || throw(ArgumentError("No such file: $(versions_js)"))
versions_js_content = read(versions_js, String)
m = match(r"var\s+DOC_VERSIONS\s*=\s*\[([0-9A-Za-z\"\s.,+-]+)\]", versions_js_content)
if isnothing(m)
throw(ArgumentError("""

Check warning on line 168 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L162-L168

Added lines #L162 - L168 were not covered by tests
Could not find DOC_VERSIONS in $(versions_js):
$(versions_js_content)"""))
end
versions = strip.(c -> isspace(c) || (c == '"'), split(m[1], ","))
filter!(!isempty, versions)
if isempty(versions)
throw(ArgumentError("""

Check warning on line 175 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L172-L175

Added lines #L172 - L175 were not covered by tests
DOC_VERSIONS empty in $(versions_js):
$(versions_js_content)"""))
end
return versions

Check warning on line 179 in src/canonical_links.jl

View check run for this annotation

Codecov / codecov/patch

src/canonical_links.jl#L179

Added line #L179 was not covered by tests
end
41 changes: 41 additions & 0 deletions src/walkdocs.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
walkdocs(f, dir::AbstractString; collect::Bool=false)

Takes a directory `dir`, which is assumed to contain Documenter-generated HTML documentation,
walks over all the files and calls `f` on each of the HTML files it find. `f` will be called
with a single object that has the following fields (all strings):

- `root`: the root directory of the walk, i.e. `dir` (but as an absolute path)
- `filename`: file name
- `relpath`: path to the file, relative to `dir`
- `fullpath`: absolute path to the file

If `collect = true` is set, the function also "collects" all the return values from `f`
from each of the function calls, essentially making `walkdocs` behave like a `map` function
applied on each of the HTML files.
"""
function walkdocs(f, dir::AbstractString; collect::Bool=false)
dir = abspath(dir)
isdir(dir) || error("docwalker: dir is not a directory\n dir = $(dir)")

mapped_collection = collect ? Any[] : nothing
for (root, _, files) in walkdir(dir)
for file in files
_, ext = splitext(file)
(ext == ".html") || continue
file_fullpath = joinpath(root, file)
file_relpath = Base.relpath(file_fullpath, dir)
fileinfo = (;
root = dir,
filename = file,
relpath = file_relpath,
fullpath = file_fullpath,
)
r = f(fileinfo)
if collect
push!(mapped_collection, r)
end
end
end
return mapped_collection
end
13 changes: 7 additions & 6 deletions test/outdated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@ mktempdir() do TMP
cp(joinpath(TMP, "fixtures", "pre"), transient_path, force=true)
OutdatedWarning.generate(transient_path)

for (root, _, files) in walkdir(transient_path)
for file in files
content = read(joinpath(root, file), String)
expected = read(joinpath(replace(root, "transient" => "post"), file), String)
@test replace(content, "\r\n" => "\n") == replace(expected, "\r\n" => "\n")
end
DocumenterTools.walkdocs(transient_path) do fileinfo
content = read(fileinfo.fullpath, String)
expected = read(
joinpath(replace(dirname(fileinfo.fullpath), "transient" => "post"), fileinfo.filename),
String
)
@test replace(content, "\r\n" => "\n") == replace(expected, "\r\n" => "\n")
end

rm(joinpath(TMP, "fixtures"), recursive=true)
Expand Down
4 changes: 4 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ import Documenter
DocumenterTools.genkeys(DocumenterMarkdown)
end

@testset "walkdocs" begin
include("walkdocs.jl")
end

@testset "outdated warnings" begin
include("outdated.jl")
end
Expand Down
19 changes: 19 additions & 0 deletions test/walkdocs.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
let fileinfos = []
rs = DocumenterTools.walkdocs(joinpath(@__DIR__, "fixtures")) do fileinfo
push!(fileinfos, fileinfo)

@test isabspath(fileinfo.root)
@test isabspath(fileinfo.fullpath)
@test !isabspath(fileinfo.relpath)
@test joinpath(fileinfo.root, fileinfo.relpath) == fileinfo.fullpath
end
@test rs === nothing
@test length(fileinfos) == 10
end

let rs = DocumenterTools.walkdocs(joinpath(@__DIR__, "fixtures"), collect=true) do fileinfo
fileinfo.root
end
@test length(rs) == 10
@test all(s -> isa(s, String), rs)
end
Loading