From 6d5787a993ae7623df479217c9805767ceaa3556 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Mon, 13 Nov 2023 11:32:10 -0500 Subject: [PATCH] Devdocs on fixing precompile hangs, take 2 (#51895) This is #50914, with only the documentation changes, plus an improvement to the warning message. --------- Co-authored-by: Tim Holy Co-authored-by: Ian Butterworth --- doc/make.jl | 1 + doc/src/devdocs/img/precompilation_hang.png | Bin 0 -> 9175 bytes doc/src/devdocs/precompile_hang.md | 98 ++++++++++++++++++++ src/jl_uv.c | 9 +- 4 files changed, 105 insertions(+), 3 deletions(-) create mode 100644 doc/src/devdocs/img/precompilation_hang.png create mode 100644 doc/src/devdocs/precompile_hang.md diff --git a/doc/make.jl b/doc/make.jl index 78604a5fd3c75..d0bd52247aa81 100644 --- a/doc/make.jl +++ b/doc/make.jl @@ -163,6 +163,7 @@ DevDocs = [ "devdocs/gc.md", "devdocs/jit.md", "devdocs/builtins.md", + "devdocs/precompile_hang.md", ], "Developing/debugging Julia's C code" => [ "devdocs/backtraces.md", diff --git a/doc/src/devdocs/img/precompilation_hang.png b/doc/src/devdocs/img/precompilation_hang.png new file mode 100644 index 0000000000000000000000000000000000000000..d076b7697f2712a12aa8132197e95ea7e7014e2d GIT binary patch literal 9175 zcmb_iXH-+$wzhy&DIy?EihvLT9GdhX9SJogKxm2(nu0>8N2bX*+AJIoCJ8Z_c@Pl!=k*i>R})WnWKRjar+KW0M4vl%#hd&%|J{k#|J=D7 z0e7_2;r@23S+?bZV?N#LmFl?W(35dAe|odrz=8Zu=3-XUN;0CD`~@~W#K?B1 z3-lRyDb>+)yF}v95$}5T8>d{eR1A?MOTJ1)7VSnJJOf@VD{vq=&fX%NZ;CpZZB^@x?RWg)_bce6J@|O{bUl5=dL~Tm zn4h&Y;mCPvaZiz?ir%pyQYn1%KfUO~@a>%s*XiupVljDu8E&xjs-x+EwjX%;_~iFl zI(SjlrqIkuPsq`>Zm9?)8S@}_LB#gk{+=13srvS24rL3BgnJygy8WQ+5{RYW81l9p zL1zez=VZKew!C@G_)O%KZ@`@b(hEGD5@2F#m0f;W*M}P`asK(jL+CDuvJXCB=9ux* z`<>4}yS%nWSF6d6bc!e2SO0s*_$|pMeZlTnGkUKpA~DX?Ro;k_(mq1RB*oZyqD}`Hv-KHp zb{+NsuNxvOn~x=|BKoWKjteJ9&VAXfx*8xoh-d>=^eV^UgQ~lq#T{fvmDeWqJajwz zC*S#4t49(het>T_=)hf2=w+7Osp-g&aNr&yzl{*l>HMhxK!}$~pMflZhD)6U5u!A=J{dbLCaj zcX!W#);9!9EjKBZnrl$zp@Zk3cZdtc4b$Dhm*XAV=lTahhJ*RiD0OXwU9JR2$5sBl zX=t2)*WMyXILFkS`r|q>I+}#*)BI$opn%inY~jA6JY1Mqq3tq?n9qt=6OXmzGZzg*4BJ^IJnM26g)c774^=I(HDdYM zCMG`MNZ4xeKd16UcpN!dmOC3vSxJ^B$G`W`Jjm$QRB)$K$RSuq)UGAETkqwc)E3+rf-TR zeYSGFT_+aAGNAJYPojU14{Qq}7+9$zSUeeR>*{P9=@3qOJyQ>K&PKUG9z_u`4?u*o ziAJ3x{fclB-MoeA z47BW5GILKQRj4@=vd=@wG~)Dw90;R`bbBrJ<=9M9JOLUHgn<4|n~5?lmPvrMQSn_$ z*CtWEQ#bJ!&79x-N~7|w9ig(^fFfmhXaywm{e&i_?%q=ERaQKq;%!cF4Q~FW+aUnS zKlRpPA#PE%N-6z=MrWW48U%L!Ri$))cCbN133H%h4fazqcU#hMw!kxwfEn(p2*zgV zJLjj`<>O{srG4w~t6SZ=z|d`#Oh*;b6mr<*>^B$;jwovtP!*I5S7|w4nBo|vKrJ^H z`k_njJpYH^CEBR)@B(34)*o3M5U8nXI-e^wh`|NE->Wa~&OVsT98Ici^qK)Vc|-sD znL!Q%2pNmwdD3^Af)vLkudDI*Mx1Ce=kF9AJ*$|tfyj25dzvZ#o@SCQ3cUVgmi~(Q z%Nt_j6b^4LcU!UG653ccsY6(m7zL9O)UU*y|4Ka$x2vOLm7{Rp*Q;qSOE7l*csb|G zt!1-=x&jTcmov{2@VBoPkpRTrMs%Bi+YVLnC;VapXLNG+AG(x)=})Ay=>Bkvx)1d| zBR&=~2(5CZd7sSo%$28lcdb_ha~~BpK7PFS_2-zk>*GD6hO@w;AsZ80-t&kD614sO zbMjDXb)WTw6VLP|zpkt(3pX+iIT%F)I^k-AT7D_HUEdq?hpYf}I={8Kk4Zst=OlMZ98EY`?_vM0vWLiB!uej@-mQ`ij;2F?&DbUj*gz zv|Q*4W}}9Rnzd}a9;+g7mKvzcGz^N%0(PgV76FEq5qdTwy2%i=s+IlDXrmG&fYj$i z2oKLZOny_DzEW#ukuLxJ^x3xuYVWhb@{cANV#t2oi3I#gK%iQR*ctxFmxIn=jNX#I z@%Uy#W7>Dy-(x5{h%2#)v7Gf#PF}@CIG_!JNT#^(&3306=qoU%M@9SQsRd zs40DkZbl1J>N^~fZQwD;##bDK%AT7Y{iJ~pI9~OeIoSpn5CaHe*Hq6qdQVU5QUyqU zI{u1!IJhE>Y>00<86Y(Hs5go9L!kR2@T&q*#^yr!J?yK2$D91}M`p9#7Lr|k;mj2J zs#YC??+<7B=-jZ;HMg1AW;xpLj1V{-wK0})3c1DFhA?+%T>yDS1|t!dTO&M5`lU7r zCWA)+i8rNvc7JeNE<8@lECsURc$3cSx!?DfS?1?^aB%h|;hXc@w4=))p zD`2vIUIEDzI=bS%sZa2STVfUyWr&NLc^t@NeTFR-@= z*mEYa&*qBdf=jdAs7I3H;XGPMoWx~|l%`i>GBAf=wUKL9*mz7%4~af6NGtu~!rP z6z0e+&8oY7#{y<%ASW{`_dB*1k1z`?B%Ql1M~Q% zM7k~oM^K;3pr{jBtq7$COD(4QHP`$`rusT_h)N)#2}E&a+BDY75c!{>1y*-SVFzEF z3wJcg&H40xb6)B^pih$^8P=@(#Us%*iD4j?5q|()WYx;0lS(PWz0#+0fv59JMO1Nc z{M%J2{u1b`U0I@d!poU$1<(WdEam3-xbplG>oySVfJ#MrefiCUo^>FOMLk<=ACkZ$ zgZq+*b+3fTNp9^L+3uH!beQR>v|NhyJs*5HPKFC>VMKTaqfo!~S6hNgA6iSZ z0#dMS^++_dXcC%JR9b3)nmj&=DU#)#hMi9)1unejx(4J$ON#|53O{XqAfLm}tkoqG z2`fU_jnSPVNxACVAzSw5q2C4UraH45OJ?%T49Wd-p}Y0T<*|k0??k{sLoC^7EzDhnRkSKb+FNP1(_K zHRM+3wZ=_n{=l}d=bE9oJjniv#7i;x%KP`>9{;IMuS%t7^6%Y&LC^P5-_U~jGt7M* zpqe!QE_`X32i@Hujbw8hj=I2jvc`_~OY&f)90tuVr_F{9_pUfk2o)04?;b#ORq&Kwpb)pFD!$0M0%DpyjV7^5X(=Ve=RxpoFWtjPG)`y#8*zuFlC-mX1(vu(Jcm zj@?NNke)gjC)z+e8s9vAT)HQD7lUA%q^-K!o)GCcQSBmTq}nx2CBKFyBig;(U~f4_ zZFsI{$YpsLQGC)p%d79ZxhZlF7-a1)e}*mv_QjS$N*{Udnc+DD+j?$Nr}sE)K~w{; zgKj48P}Vb2;CjpFYwG0-Rm~-crM^cey)T2;Pnun~r&=BMsi$p@_6-$lS>0auP#!fO zkM_5OASf9lMo0p?4=hfIJz-QIo-pK4i&)x@mLxWj1XID>6x|!fWq{@$Yv^t|X416@ zeC&RZkSn7neW@uSU3QKstK^mix|W^n$D6s*gXn9CepcGcVGNo*cOvdgZE#qto-~~D z@p>g8LknRs+; zFm{!9Z&?uuY=&(4m0~6fZeGkj!d$Z~wDSHY#~N#xB|R_y3>?{%2NZ@Ny=D!qlTF^{ zXU47^L(`v)Vo3B~Mz(*H`T3J;0q@^N9gWm@g7v~KHB@cB6atyl6z9|m+o2B&4TZBffVOcedqvV1b z@G2Di_<8Kg9pU1qca>^~gL@VFAdu~AWN5EOkhD^SFy;Y=H^DK6=?0$$k*4ao-ilWw_n8xij%^w>8 z^x)kBGJ}CbgrOdx3V9>z9Cp~YP*rXXKb=Z*XfdgUBeCq|W5|Ca?^D#*su4O& zc#EB zU;u1uuM?~4lEO0O=EL^%$q_PnXT!~2869-kGg7&_+wYoDe!my80&Ky~ZIr-XcHjY# z8o|z{cT#)%OSEsHewj{nC~0{68E><<=y=E$2QqlO)W`?`X$R9R+F18Z%BcE`Sk}LPVZ(z4p;~^Ybsku z@_oF*Ilj|PPQ-p7gSlU#T?eIoGmTWI`Z@UgB*VdG{4wFv#5dEQi%)u@ek2VfxG%U6 zdx=KeRu5o5O^81~;NYu$*5tP0V zE*$ZYZMv!WWG?n|D|TgxIa*0m+Jm>e=W@;Fo`*B!kqcPxa5NwyJlq&z(3#+^rnPH4 z8t~PzSLu=SJKq#wUbNNW<4WJj1@uF<(UsPJGLopZ>uQ+aHvs^h5{hbnsN7E3Kyq>+ zOQo5KL$87Af2H~=#r??1XNWl+ptYyhMFQ2TOuKO45E2(Vy^-GKLRpG0inaMRIJJ&U z_4}W2n~1EyH7(>yN7mnkX~ns*nC)UMjXYvSjHFXca{N!Qp+ks%m#Zaq<^*w{(PbD6 zGJMJ1f&!w5E;-y2pP~FMm>qo)>=5-$L=2o8jyuo3Q)_N-Kk*Y&da%yH)=Qz1OqsLO z`0>K~lDS!PbxK3iCpzl*n%oP+ic0#pRoDcS?o+VaV`~$3{PPdsR(|Sg&5ZP6Us7}8 z$nrOc-SS9o<|7SX&X#8yJwl)qi7KLdkF(QLPJ)O_uR{wA-T_78F(vOA?T_ml_FS4p zI$gf}>B8Qb>|znnV^e0!YW|DY?d?}v;-Y}*&hyB6<5+^F{}J*_g!*{p)Em0|nr;`_ z5EM93XhwwDo}bJ!mk`2F8206!1alu#b$!vWMFjzv!^LdU@uY*8jD3lG@(R0-@@mr@ zCX>^W7`{;4y-u_tet#xa!qsc>cKRiJKol2FS!Xuxppc{AQ@zo%aG*>?^`|Fa8IPys zGp~g_1=h9_jVaLvpw>O8Z_>DkW2M9^r*X3S29$*ahEHfLUcP0@?Dq7xb7N}*Hnr06 zBBaO}ZjCCsJjbHDpAzKp@UR5}$B_0-jQ(<*GCOX)Fub5cZhw9vZE+-B)Oy;jfXcG& zxQ`sRz@@D~rMN?fj22|Sav>Z0#NsU!e^BV$>-($gf(O`z0=nP@d7c8LVii18>U>d@ z#wi&IhK#6zZ}+tBKr)L~#UWoMNuOFT8NH4{Db_WV_qiQEf0@$lV0&X(z@!0p%*Pf- zh*ZM)NMP7@@+=j8nW(Mx$g%zH#Nw+{rAHe3iFm9y96o;|UN!1ixnJe!%C1Pu z7jHhk>VSTlw)V0HjGz?~^L6>)rl0<~YBMjOHv65nZ zbX(Gag$Hn9x@-L@W0gmAaaCBf|i)tW3N05Q@@|amnHGS@v{^TrOby zIJ}>qMQG%7ufn=o4lUxeGWB`uvC2qMhQh5lOEj3AGXPyX+OFoXXwq<4k9UksN5ZqO z+<({ouLuVBLex=l(qHHsi{Xt0!o~A-r3>}_*#w2M>xi>f_Unr{7WsKUOSUU-M@0rH zMr4fs&|uZ;9S;Bf*fM1@6>iSS$x{fU)DKq`rCSDp^Hl~4iKUK86}O+Nbt{bc;t^}D z_F|t}U_0Sp>ol&9ZZ{MjQof%4L)9mD%zV4Xl2$H&ClN&UPK3VR4x1AJedvE#SHJWm z`p?j>X_xd)8wv*#rC4JrM-_#^?Za=L5tk3+O2;05uOGvNI~1DZF5`p@Uk*Ary|-N* zo+<4UUx@JyKjs_WIY=Xw1RW(Bzh|`Wu-$IE#&F;ASB3*wL)}eKki8<^#JgraL7g^k zk8rRH9kxEX3>9&-ZL6gQ^SlmS7oL;o2TF?Jil}rYPMtf@S6s z+4~^`a%X~6?`B$7f^|(bOTh1Po8%PMGb%ey`FpzPrSOAd+-cf3wDX3}Yjh03=INVv)S-S1)>u|Ui5VB_vF)(Zw$0i1oCQ4BeL*U25sr`SQyqv|*eALdk4C_rp zj}XdYbFmH`&_tOwAzRu*7gc;JYTZ$8i@KDgTObY+_nCoDSRa^di(<`hJ_)Is8 z!w;>HNYg#hb%a>&1DXk8W9fW!@|M!e@t+zS$Pieqxs=&m70)R(@bvSJ2yCa%kaH*Gj2-g*X&tMwQ7Lon79yy#X?f3KUk> z8K`qqcTYwCex9ND%~y}ZoetjdKo&Y8CT{X?vKW>MIsMggVMz^b`p80gTcHfy4e*4$ zTI7zD_@FscXHE*_A&1|H%!o5iuJi0XtAg}`s`>XK%MaBQfd3a7Z{{HPHhanEF5qd& zZJs-kP)Y+(vi^Oo%l&)V0|{wafy6RfIQzZ(9kukH5Qq6!XmPWTiWkLI)Q#F*$I)pA z6)8`NJ0WMz8R93!VqkB1sp8g|y<*~Q&u`zU%Ywk)UQ1jw#>0541`uYg6a=sdw>~}e zj5c;sAl&t~O2rjZI}17(h}c66IGp}z0Se5SC#_V3S9*%lQMPHL)=^h_S7GuS>tlDi zS0L;nT$@{8avs)TvLHx|zzu@_V2K=stt#O+IL{(w`0$K1U=3zD1cwV+m^AILn50P8 zGcFB(y8AbgeuWeZwQ|<~&I??g=`)X(sc^gBTSG1>ZdFQO^tOa~iJ6?8{J0pl@$#|E zSV!M}>43G_XRLZ95a+{sO9QizzM!c;#KfWQ!Hg!{atnIUR9zO8KcbK;ql-lCROezW z(N@_Rkp^I|uv-geB(n>)85Kxfd=_{}x)A;1s2p z?RYh}uv_*aV7Bm!FPc>f_@3~E)~&G%6!#Ka#jQG^GG%qoXvG1bX<9a~#va!KIJ0S| zi733sR$rbN=E-*brjJlGR`Ru^?mCELqWKy&nJ&n-UP$xzgeW^ODuyuRR z8Vi?C9IkPG*IErP@=|@|?}?`t`m(QBCB|3H@jLj07)GS}Q-~6J zJ(_sC4E){i7U2xIoU;%eKjjSk5s^YBoi76XN#$}qDG$4$~ z=)do(DNr+#O=(s_{tIgU|`W1xwKqdCZXMJ6I0uU!lIHm0sQQ&(7ybCybI)iD7Ass!2iq5 qCZ8Yq1ZQRb>u+AadB6Ace61eNQQn$&4Y)Js+#PKrtxAo1q5lO5s^B&N literal 0 HcmV?d00001 diff --git a/doc/src/devdocs/precompile_hang.md b/doc/src/devdocs/precompile_hang.md new file mode 100644 index 0000000000000..0de9c99792b64 --- /dev/null +++ b/doc/src/devdocs/precompile_hang.md @@ -0,0 +1,98 @@ +# Fixing precompilation hangs due to open tasks or IO + +On Julia 1.10 or higher, you might see the following message: + +![Screenshot of precompilation hang](./img/precompilation_hang.png) + +This may repeat. If it continues to repeat with no hints that it will +resolve itself, you may have a "precompilation hang" that requires +fixing. Even if it's transient, you might prefer to resolve it so that +users will not be bothered by this warning. This page walks you +through how to analyze and fix such issues. + +If you follow the advice and hit `Ctrl-C`, you might see + +``` +^C Interrupted: Exiting precompilation... + + 1 dependency had warnings during precompilation: +┌ Test1 [ac89d554-e2ba-40bc-bc5c-de68b658c982] +│ [pid 2745] waiting for IO to finish: +│ Handle type uv_handle_t->data +│ timer 0x55580decd1e0->0x7f94c3a4c340 +``` + +This message conveys two key pieces of information: + +- the hang is occurring during precompilation of `Test1`, a dependency of `Test2` (the package we were trying to load with `using Test2`) +- during precompilation of `Test1`, Julia created a `Timer` object (use `?Timer` if you're unfamiliar with Timers) which is still open; until that closes, the process is hung + +If this is enough of a hint for you to figure out how `timer = Timer(args...)` is being created, one good solution is to add `wait(timer)` if `timer` eventually finishes on its own, or `close(timer)` if you need to force-close it, before the final `end` of the module. + +However, there are cases that may not be that straightforward. Usually the best option is to start by determining whether the hang is due to code in Test1 or whether it is due to one of Test1's dependencies: + +- Option 1: `Pkg.add("Aqua")` and use [`Aqua.test_persistent_tasks`](https://juliatesting.github.io/Aqua.jl/dev/#Aqua.test_persistent_tasks-Tuple{Base.PkgId}). This should help you identify which package is causing the problem, after which the instructions [below](@ref pchang_fix) should be followed. If needed, you can create a `PkgId` as `Base.PkgId(UUID("..."), "Test1")`, where `...` comes from the `uuid` entry in `Test1/Project.toml`. +- Option 2: manually diagnose the source of the hang. + +To manually diagnose: + +1. `Pkg.develop("Test1")` +2. Comment out all the code `include`d or defined in `Test1`, *except* the `using/import` statements. +3. Try `using Test2` (or even `using Test1` assuming that hangs too) again + +Now we arrive at a fork in the road: either + +- the hang persists, indicating it is [due to one of your dependencies](@ref pchang_deps) +- the hang disappears, indicating that it is [due to something in your code](@ref pchang_fix). + +## [Diagnosing and fixing hangs due to a package dependency](@id pchang_deps) + +Use a binary search to identify the problematic dependency: start by commenting out half your dependencies, then when you isolate which half is responsible comment out half of that half, etc. (You don't have to remove them from the project, just comment out the `using`/`import` statements.) + +Once you've identified a suspect (here we'll call it `ThePackageYouThinkIsCausingTheProblem`), first try precompiling that package. If it also hangs during precompilation, continue chasing the problem backwards. + +However, most likely `ThePackageYouThinkIsCausingTheProblem` will precompile fine. This suggests it's in the function `ThePackageYouThinkIsCausingTheProblem.__init__`, which does not run during precompilation of `ThePackageYouThinkIsCausingTheProblem` but *does* in any package that loads `ThePackageYouThinkIsCausingTheProblem`. To test this theory, set up a minimal working example (MWE), something like + +```julia +(@v1.10) pkg> generate MWE + Generating project MWE: + MWE\Project.toml + MWE\src\MWE.jl +``` + +where the source code of `MWE.jl` is + +```julia +module MWE +using ThePackageYouThinkIsCausingTheProblem +end +``` + +and you've added `ThePackageYouThinkIsCausingTheProblem` to MWE's dependencies. + +If that MWE reproduces the hang, you've found your culprit: +`ThePackageYouThinkIsCausingTheProblem.__init__` must be creating the `Timer` object. If the timer object can be safely `close`d, that's a good option. Otherwise, the most common solution is to avoid creating the timer while *any* package is being precompiled: add + +```julia +ccall(:jl_generating_output, Cint, ()) == 1 && return nothing +``` + +as the first line of `ThePackageYouThinkIsCausingTheProblem.__init__`, and it will avoid doing any initialization in any Julia process whose purpose is to precompile packages. + +## [Fixing package code to avoid hangs](@id pchang_fix) + +Search your package for suggestive words (here like "Timer") and see if you can identify where the problem is being created. Note that a method *definition* like + +```julia +maketimer() = Timer(timer -> println("hi"), 0; interval=1) +``` + +is not problematic in and of itself: it can cause this problem only if `maketimer` gets called while the module is being defined. This might be happening from a top-level statement such as + +```julia +const GLOBAL_TIMER = maketimer() +``` + +or it might conceivably occur in a [precompile workload](https://github.com/JuliaLang/PrecompileTools.jl). + +If you struggle to identify the causative lines, then consider doing a binary search: comment out sections of your package (or `include` lines to omit entire files) until you've reduced the problem in scope. diff --git a/src/jl_uv.c b/src/jl_uv.c index 281dd798dbb36..62dc3a628d085 100644 --- a/src/jl_uv.c +++ b/src/jl_uv.c @@ -51,9 +51,9 @@ static void walk_print_cb(uv_handle_t *h, void *arg) npad += strlen(type); pad += npad < strlen(pad) ? npad : strlen(pad); if (fd == -1) - jl_safe_printf(" %s %s@%p->%p\n", type, pad, (void*)h, (void*)h->data); + jl_safe_printf(" %s %s%p->%p\n", type, pad, (void*)h, (void*)h->data); else - jl_safe_printf(" %s[%zd] %s@%p->%p\n", type, (size_t)fd, pad, (void*)h, (void*)h->data); + jl_safe_printf(" %s[%zd] %s%p->%p\n", type, (size_t)fd, pad, (void*)h, (void*)h->data); } static void wait_empty_func(uv_timer_t *t) @@ -63,9 +63,12 @@ static void wait_empty_func(uv_timer_t *t) if (!uv_loop_alive(t->loop)) return; jl_safe_printf("\n[pid %zd] waiting for IO to finish:\n" - " TYPE[FD/PID] @UV_HANDLE_T->DATA\n", + " Handle type uv_handle_t->data\n", (size_t)uv_os_getpid()); uv_walk(jl_io_loop, walk_print_cb, NULL); + if (jl_generating_output() && jl_options.incremental) { + jl_safe_printf("This means that a package has started a background task or event source that has not finished running. For precompilation to complete successfully, the event source needs to be closed explicitly. See the developer documentation on fixing precompilation hangs for more help.\n"); + } jl_gc_collect(JL_GC_FULL); }