From 892d223a9d6f93088cef8ad93267e354bac7525e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Denis=20Baru=C4=8Di=C4=87?= Date: Fri, 20 Sep 2024 13:13:25 +0200 Subject: [PATCH] Fix import-before-module check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #51635 introduced function `check_src_module_wrap` which checks whether a module declaration is missing or whether a `using`/`import` is declared before a module. The original implementation, based on manual checks via regexes, did not catch all possible cases. In particular, it did not consider multi-line comments, e.g., ``` using Foo =# module Bar end ``` threw errors even though it should be valid. Single-line comments, multi-line comments, and docstrings make it quite difficult to reliably check that a module is declared and not preceded by any using/import. Some examples are ``` module B end =# using A =# module B end using A =# module B end "docs" module B end ``` Because of that, this commit avoids all manually-written, specialized conditions and employs `Meta.parse` to parse the code into `Expr`s. It then becomes very simple to check the condition of interest. The downside is increased runtime. [WITHOUT the shortcut; see bellow] I measured the time it takes to check three files of different sizes (the first measurements is the current version, the second is master): ``` # 63979 lines of code @btime Base.check_src_module_wrap(Base.PkgId("dummy"), "LibSCIP.jl") 102.267 ms (535826 allocations: 36.19 MiB) 5.630 μs (8 allocations: 528 bytes) # 1051 lines of code @btime Base.check_src_module_wrap(Base.PkgId("dummy"), "Primes.jl") 3.525 ms (21503 allocations: 1.08 MiB) 5.946 μs (9 allocations: 656 bytes) # 79 lines of code @btime Base.check_src_module_wrap(Base.PkgId("dummy"), "BenchmarkTools.jl") 106.170 μs (578 allocations: 46.02 KiB) 3.240 μs (8 allocations: 560 bytes) ``` The slowdown is *dramatic* (most of the time is spent reading the source file into a String). I added a condition that checks if the file content starts with "module ", in which case the function returns immediatelly. This shortcut helps: LibSCIP.jl takes only 5.865 μs (instead of 102.265 ms). Of course, the shortcut is taken only in some cases... I am submitting this PR anyway for discussion. Fixes #55792 --- base/loading.jl | 48 ++++++++++++++----------- test/loading.jl | 94 +++++++++++++++++++++++++++++++++++++------------ 2 files changed, 99 insertions(+), 43 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 8d180845f942f..299c2fef7958b 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -2792,33 +2792,39 @@ function load_path_setup_code(load_path::Bool=true) return code end +# Return true if module declaration is found +# Throw an error if using/import is found +# Otherwise return false +function _check_src_module_wrap(pkg::PkgId, srcpath::String, ex::Expr) + isexpr(ex, [:using, :import]) && throw(ErrorException("Package $(repr("text/plain", pkg)): file $srcpath has a using/import before a module declaration.")) + isexpr(ex, :module) && return true + for arg in ex.args + if isa(arg, Expr) + _check_src_module_wrap(pkg, srcpath, arg) && return true + end + end + return false +end + """ check_src_module_wrap(srcpath::String) Checks that a package entry file `srcpath` has a module declaration, and that it is before any using/import statements. """ function check_src_module_wrap(pkg::PkgId, srcpath::String) - module_rgx = r"^(|end |\"\"\" )\s*(?:@)*(?:bare)?module\s" - load_rgx = r"\b(?:using|import)\s" - load_seen = false - inside_string = false - for s in eachline(srcpath) - if count("\"\"\"", s) == 1 - # ignore module docstrings - inside_string = !inside_string - end - inside_string && continue - if contains(s, module_rgx) - if load_seen - throw(ErrorException("Package $(repr("text/plain", pkg)) source file $srcpath has a using/import before a module declaration.")) - end - return true - end - if startswith(s, load_rgx) - load_seen = true - end - end - throw(ErrorException("Package $(repr("text/plain", pkg)) source file $srcpath does not contain a module declaration.")) + # Fast path: if the source file starts with a module declaration, just + # return true + open(startswith("module "), srcpath) && return true + + source = read(srcpath, String) + ex, pos = Meta.parse(source, 1) + while ex !== nothing + # Dosctrings are parsed as an expression with the module definition in + # the expression's argument, so we need recursive check + _check_src_module_wrap(pkg, srcpath, ex) && return true + ex, pos = Meta.parse(source, pos) + end + throw(ErrorException("Package $(repr("text/plain", pkg)): file $srcpath does not contain a module declaration.")) end # this is called in the external process that generates precompiled package files diff --git a/test/loading.jl b/test/loading.jl index 8db8405ef2a83..ee896c68bb184 100644 --- a/test/loading.jl +++ b/test/loading.jl @@ -1271,22 +1271,26 @@ end @testset "checking srcpath modules" begin p = Base.PkgId("Dummy") fpath, _ = mktemp() + + function check(src) + write(fpath, src) + return Base.check_src_module_wrap(p, fpath) + end + @testset "valid" begin - write(fpath, """ + @test check(""" module Foo using Bar end """) - @test Base.check_src_module_wrap(p, fpath) - write(fpath, """ + @test check(""" baremodule Foo using Bar end """) - @test Base.check_src_module_wrap(p, fpath) - write(fpath, """ + @test check(""" \"\"\" Foo using Foo @@ -1295,66 +1299,112 @@ end using Bar end """) - @test Base.check_src_module_wrap(p, fpath) - write(fpath, """ + @test check(""" \"\"\" Foo \"\"\" module Foo using Bar end """) - @test Base.check_src_module_wrap(p, fpath) - write(fpath, """ + @test check(""" \"\"\" Foo \"\"\" module Foo using Bar end """) - @test Base.check_src_module_wrap(p, fpath) - write(fpath, """ + @test check(""" @doc let x = 1 x end module Foo using Bar end """) - @test Base.check_src_module_wrap(p, fpath) - write(fpath, """ + @test check(""" # using foo module Foo using Bar end """) - @test Base.check_src_module_wrap(p, fpath) + + @test check(""" + #= + using foo + =# + module Foo + using Bar + end + """) + + @test check(""" + #= + nested multiline comment + #= + using foo + =# + =# + module Foo + using Bar + end + """) + + @test check(""" + #= using foo =# + module Foo + using Bar + end + """) + + @test check(""" + #= \"\"\" =# + module Foo + #= \"\"\" =# + using Bar + end + """) end @testset "invalid" begin - write(fpath, """ + @test_throws ErrorException check(""" # module Foo using Bar # end """) - @test_throws ErrorException Base.check_src_module_wrap(p, fpath) - write(fpath, """ + @test_throws ErrorException check(""" using Bar module Foo end """) - @test_throws ErrorException Base.check_src_module_wrap(p, fpath) - write(fpath, """ + @test_throws ErrorException check(""" using Bar """) - @test_throws ErrorException Base.check_src_module_wrap(p, fpath) - write(fpath, """ + @test_throws ErrorException check(""" x = 1 """) - @test_throws ErrorException Base.check_src_module_wrap(p, fpath) + + @test_throws ErrorException check(""" + using Bar #= + =# + module Foo + end + """) + + @test_throws ErrorException check(""" + #= + module Foo + =# + using Bar + module Foo2 + end + #= + end + =# + """) end end