diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f43a688e9..29c538079d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 **For upgrading:** You should double check and fix all the offending links. Alternatively, you can also set `strict = Documenter.except(:cross_references)`, so that the errors would be reduced to warnings (however, this is not recommended, as you will have broken links in your generated documentation). +* The HTML output now enforces size thresholds for the generated HTML files, to catch cases where Documenter is deploying extremely large HTML files (usually due to generated content, like figures). + If any generated HTML file is above either of the thresholds, Documenter will either error and fail the build (if above `size_threshold`), or warn (if above `size_threshold_warn`). ([#2142], [#2205]) + + **For upgrading:** If your builds are now failing due to the size threshold checks, you should first investigate why the generated HTML files are so large (e.g. you are likely automatically generating too much HTML, like extremely large inline SVG figures), and try to reduce them below the default thresholds. + If you are unable to reduce the generated file size, you can increase the `size_threshold` value to just above the maximum size, or disable the enforcement of size threshold checks altogether by setting `size_threshold = nothing`. + + ### Added * Doctest filters can now be specified as regex/substitution pairs, i.e. `r"..." => s"..."`, in order to control the replacement (which defaults to the empty string, `""`). ([#1989], [#1271]) diff --git a/src/html/HTMLWriter.jl b/src/html/HTMLWriter.jl index 6e8a77ae11..22e9d4b07b 100644 --- a/src/html/HTMLWriter.jl +++ b/src/html/HTMLWriter.jl @@ -365,6 +365,21 @@ value is `"en"`. **`warn_outdated`** inserts a warning if the current page is not the newest version of the documentation. +**`size_threshold`** sets the maximum allowed HTML file size (in bytes) that Documenter is allowed to +generate for a page. If the generated HTML file is larged than this, Documenter will throw an error and +the build will fail. If set to `nothing`, the file sizes are not checked. Defaults to `200 KiB` (but +increases of this default value will be considered to be non-breaking). + +**`size_threshold_warn`**: like `size_threshold`, but going over this limit will only cause Documenter to +print a warning, instead of throwing an error. Defaults to `100 KiB`, and must be less than or equal to +`size_threshold`. + +!!! note "Purpose of HTML size thresholds" + + The size threshold, with a reasonable default, exists so that users would not deploy huge pages + accidentally (which among other this will result in bad UX for the readers and negatively impacts + SEO). It is relatively easy to have e.g. an `@example` produce a lot of output. + ## Experimental options **`prerender`** a boolean (`true` or `false` (default)) for enabling prerendering/build @@ -432,6 +447,8 @@ struct HTML <: Documenter.Writer prerender :: Bool node :: Union{Cmd,String,Nothing} highlightjs :: Union{String,Nothing} + size_threshold :: Int + size_threshold_warn :: Int function HTML(; prettyurls :: Bool = true, @@ -450,11 +467,11 @@ struct HTML <: Documenter.Writer ansicolor :: Bool = true, lang :: String = "en", warn_outdated :: Bool = true, - - # experimental keywords prerender :: Bool = false, node :: Union{Cmd,String,Nothing} = nothing, highlightjs :: Union{String,Nothing} = nothing, + size_threshold :: Union{Integer, Nothing} = 200 * 2^10, + size_threshold_warn :: Union{Integer, Nothing} = 100 * 2^10, # deprecated keywords edit_branch :: Union{String, Nothing, Default} = Default(nothing), @@ -487,10 +504,25 @@ struct HTML <: Documenter.Writer end footer = isnothing(footer) ? nothing : convert(Node, footer) end + # convert size threshold values to integers, if need be + if isnothing(size_threshold) + size_threshold = typemax(Int) + elseif size_threshold <= 0 + throw(ArgumentError("size_threshold must be non-negative, got $(size_threshold)")) + end + if isnothing(size_threshold_warn) + size_threshold_warn = min(typemax(Int), size_threshold) + elseif size_threshold_warn <= 0 + throw(ArgumentError("size_threshold_warn must be non-negative, got $(size_threshold_warn)")) + elseif size_threshold_warn > size_threshold + throw(ArgumentError("size_threshold_warn ($size_threshold_warn) must be smaller than size_threshold ($size_threshold)")) + end isa(edit_link, Default) && (edit_link = edit_link[]) new(prettyurls, disable_git, edit_link, repolink, canonical, assets, analytics, collapselevel, sidebar_sitename, highlights, mathengine, description, footer, - ansicolor, lang, warn_outdated, prerender, node, highlightjs) + ansicolor, lang, warn_outdated, prerender, node, highlightjs, + size_threshold, size_threshold_warn, + ) end end @@ -696,12 +728,14 @@ function render(doc::Documenter.Document, settings::HTML=HTML()) copy_asset("themes/$(theme).css", doc) end - for page in keys(doc.blueprint.pages) + size_limit_successes = map(collect(keys(doc.blueprint.pages))) do page idx = findfirst(nn -> nn.page == page, doc.internal.navlist) nn = (idx === nothing) ? Documenter.NavNode(page, nothing, nothing) : doc.internal.navlist[idx] @debug "Rendering $(page) [$(repr(idx))]" render_page(ctx, nn) end + # Check that all HTML files are smaller or equal to size_threshold option + all(size_limit_successes) || throw(HTMLSizeThresholdError()) render_search(ctx) @@ -714,6 +748,13 @@ function render(doc::Documenter.Document, settings::HTML=HTML()) generate_siteinfo_json(doc.user.build) end +struct HTMLSizeThresholdError <: Exception end +function Base.showerror(io::IO, ::HTMLSizeThresholdError) + print(io, """ + HTMLSizeThresholdError: Some generated HTML files are above size_threshold. + See logged errors for details.""") +end + """ Copies an asset from Documenters `assets/html/` directory to `doc.user.build`. Returns the path of the copied asset relative to `.build`. @@ -759,9 +800,7 @@ function render_page(ctx, navnode) article = render_article(ctx, navnode) footer = render_footer(ctx, navnode) htmldoc = render_html(ctx, navnode, head, sidebar, navbar, article, footer) - open_output(ctx, navnode) do io - print(io, htmldoc) - end + write_html(ctx, navnode, htmldoc) end ## Search page @@ -783,9 +822,7 @@ function render_search(ctx) script[:src => relhref(src, ctx.search_js)], ] htmldoc = render_html(ctx, ctx.search_navnode, head, sidebar, navbar, article, footer, scripts) - open_output(ctx, ctx.search_navnode) do io - print(io, htmldoc) - end + write_html(ctx, ctx.search_navnode, htmldoc) end ## Rendering HTML elements @@ -1705,13 +1742,30 @@ end # ------------------------------------------------------------------------------ """ -Opens the output file of the `navnode` in write node. If necessary, the path to the output -file is created before opening the file. +Writes the HTML DOM into the HTML file that corresponds to `navnode`. +Prints a warning/error if the page goes over the `size_threshold` or `size_threshold_warn` +limits, and in the former case also returns `false`, to report back to the caller that the +size threshold check failed. """ -function open_output(f, ctx, navnode) - path = joinpath(ctx.doc.user.build, get_url(ctx, navnode)) +function write_html(ctx::HTMLContext, navnode::Documenter.NavNode, page_html::DOM.HTMLDocument) :: Bool + page_path = get_url(ctx, navnode) + buf = IOBuffer() + print(buf, page_html) + path = joinpath(ctx.doc.user.build, page_path) isdir(dirname(path)) || mkpath(dirname(path)) - open(f, path, "w") + file_size = open(io -> write(io, take!(buf)), path; write=true) + size_threshold_msg(var::Symbol) = """ + Generated HTML over $(var) limit: $(page_path) + Generated file size: $(file_size) (bytes) + size_threshold_warn: $(ctx.settings.size_threshold_warn) (bytes) + size_threshold: $(ctx.settings.size_threshold) (bytes)""" + if file_size > ctx.settings.size_threshold + @error size_threshold_msg(:size_threshold) + return false + elseif file_size > ctx.settings.size_threshold_warn + @warn size_threshold_msg(:size_threshold_warn) + end + return true end """ diff --git a/test/examples/make.jl b/test/examples/make.jl index 75738e2a0b..b2e4024462 100644 --- a/test/examples/make.jl +++ b/test/examples/make.jl @@ -17,9 +17,9 @@ EXAMPLE_BUILDS = if haskey(ENV, "DOCUMENTER_TEST_EXAMPLES") split(ENV["DOCUMENTER_TEST_EXAMPLES"]) else ["html", "html-meta-custom", "html-mathjax2-custom", "html-mathjax3", "html-mathjax3-custom", - "html-local", "html-draft", "html-repo-git", - "html-repo-nothing", "html-repo-error", "latex_texonly", "latex_simple_texonly", - "latex_showcase_texonly", "html-pagesonly"] + "html-local", "html-draft", "html-repo-git", "html-repo-nothing", "html-repo-error", + "html-sizethreshold-defaults-fail", "html-sizethreshold-success", "html-sizethreshold-ignore-success", "html-sizethreshold-override-fail", + "latex_texonly", "latex_simple_texonly", "latex_showcase_texonly", "html-pagesonly"] end # Modules `Mod` and `AutoDocs` @@ -476,6 +476,63 @@ end end end +# size thresholds +@examplebuild "sizethreshold-defaults-fail" begin + @quietly try + makedocs(; + sitename = "Megabyte", + root = examples_root, + build = "builds/sizethreshold-defaults-fail", + source = "src.megapage", + debug = true, + ) + catch e + e + end +end +@examplebuild "sizethreshold-success" begin + @quietly try + makedocs(; + sitename = "Megabyte", + root = examples_root, + build = "builds/sizethreshold-success", + source = "src.megapage", + format = Documenter.HTML(size_threshold = 5 * 2^20), + debug = true, + ) + catch e + e + end +end +@examplebuild "sizethreshold-ignore-success" begin + @quietly try + makedocs(; + sitename = "Megabyte", + root = examples_root, + build = "builds/sizethreshold-ignore-success", + source = "src.megapage", + format = Documenter.HTML(size_threshold = nothing), + debug = true, + ) + catch e + e + end +end +@examplebuild "sizethreshold-override-fail" begin + @quietly try + makedocs(; + sitename = "Megabyte", + root = examples_root, + build = "builds/sizethreshold-override-fail", + source = "src.megapage", + format = Documenter.HTML(size_threshold = 100, size_threshold_warn = nothing), + debug = true, + ) + catch e + e + end +end + # PDF/LaTeX examples_latex_simple_doc = if "latex_simple" in EXAMPLE_BUILDS @info("Building mock package docs: LaTeXWriter/simple") diff --git a/test/examples/src.megapage/index.md b/test/examples/src.megapage/index.md new file mode 100644 index 0000000000..a9a63d3d4c --- /dev/null +++ b/test/examples/src.megapage/index.md @@ -0,0 +1,20 @@ +# Megabytepage + +This page has more than 1MB of HTML. + +```@example +using Random +for s in Base.Iterators.partition(randstring(2^20), 80) + # Note: the join() is necessary to get strings (as opposed to Vector{Char} objects) + # on older Julia versions, since there was a breaking-ish bugfix that changed how + # Iterators.partition works with strings. join(::SubString) appears to basically be + # a no-op, so it has no real effect on newer Julia versions. + # + # https://github.com/JuliaLang/julia/issues/45768 + # https://github.com/JuliaLang/julia/pull/46234 + # + # Note: we _could_ also just print the vectors, but then the HTML files end up being + # ~14 MiB. + println(join(s)) +end +``` diff --git a/test/examples/tests.jl b/test/examples/tests.jl index b7cb59a4e9..32fc9c95c0 100644 --- a/test/examples/tests.jl +++ b/test/examples/tests.jl @@ -245,6 +245,13 @@ end @test examples_html_repo_error_doc.user.remote === nothing end + @testset "HTML: sizethreshold" begin + @test examples_html_sizethreshold_defaults_fail_doc isa Documenter.HTMLWriter.HTMLSizeThresholdError + @test examples_html_sizethreshold_success_doc isa Documenter.Document + @test examples_html_sizethreshold_ignore_success_doc isa Documenter.Document + @test examples_html_sizethreshold_override_fail_doc isa Documenter.HTMLWriter.HTMLSizeThresholdError + end + @testset "PDF/LaTeX: TeX only" begin doc = Main.examples_latex_texonly_doc @test isa(doc, Documenter.Documenter.Document) diff --git a/test/htmlwriter.jl b/test/htmlwriter.jl index 696e2b2e5e..954d959dc4 100644 --- a/test/htmlwriter.jl +++ b/test/htmlwriter.jl @@ -260,5 +260,40 @@ end generate_redirect_file(redirectfile, entries) @test !isfile(redirectfile) end + + @testset "HTML: size_threshold" begin + @test_throws ArgumentError Documenter.HTML(size_threshold = 0) + @test_throws ArgumentError Documenter.HTML(size_threshold = -100) + @test_throws ArgumentError Documenter.HTML(size_threshold_warn = 0) + @test_throws ArgumentError Documenter.HTML(size_threshold_warn = -100) + @test_throws ArgumentError Documenter.HTML(size_threshold = -100, size_threshold_warn = -100) + @test_throws ArgumentError Documenter.HTML(size_threshold = 1, size_threshold_warn = 2) + # Less than size_threshold_warn: + @test_throws ArgumentError Documenter.HTML(size_threshold = 1) + + html = Documenter.HTML() + @test html.size_threshold == 200 * 2^10 + @test html.size_threshold_warn == 100 * 2^10 + + html = Documenter.HTML(size_threshold = nothing) + @test html.size_threshold == typemax(Int) + @test html.size_threshold_warn == 100 * 2^10 + + html = Documenter.HTML(size_threshold = nothing, size_threshold_warn = 1234) + @test html.size_threshold == typemax(Int) + @test html.size_threshold_warn == 1234 + + html = Documenter.HTML(size_threshold_warn = nothing) + @test html.size_threshold == 200 * 2^10 + @test html.size_threshold_warn == 200 * 2^10 + + html = Documenter.HTML(size_threshold = 1234, size_threshold_warn = nothing) + @test html.size_threshold == 1234 + @test html.size_threshold_warn == 1234 + + html = Documenter.HTML(size_threshold = 12345, size_threshold_warn = 1234) + @test html.size_threshold == 12345 + @test html.size_threshold_warn == 1234 + end end end