From d4a5f2d6b478616b1692c78c5939e96423a79b0d Mon Sep 17 00:00:00 2001 From: TFUsers <25044281+TFUsers@users.noreply.github.com> Date: Mon, 9 Nov 2020 08:46:14 -0800 Subject: [PATCH] Add SiLU (#19497) * add silu activation * Update activations.md * add activation images * update activation docs * add silu test * add silu test --- .../python_docs/python/api/gluon/nn/index.rst | 2 ++ .../gluon/blocks/activations/activations.md | 34 +++++++++++++----- .../gluon/blocks/activations/images/gelu.png | Bin 0 -> 10289 bytes .../gluon/blocks/activations/images/silu.png | Bin 0 -> 10155 bytes python/mxnet/gluon/nn/activations.py | 33 +++++++++++++++-- tests/python/unittest/test_gluon.py | 7 ++++ tests/python/unittest/test_numpy_gluon.py | 7 ++++ 7 files changed, 72 insertions(+), 11 deletions(-) create mode 100644 docs/python_docs/python/tutorials/packages/gluon/blocks/activations/images/gelu.png create mode 100644 docs/python_docs/python/tutorials/packages/gluon/blocks/activations/images/silu.png diff --git a/docs/python_docs/python/api/gluon/nn/index.rst b/docs/python_docs/python/api/gluon/nn/index.rst index 009464724ba0..7aeacd87cc06 100644 --- a/docs/python_docs/python/api/gluon/nn/index.rst +++ b/docs/python_docs/python/api/gluon/nn/index.rst @@ -147,6 +147,8 @@ Advanced Activation Layers nn.ELU nn.SELU nn.Swish + nn.SiLU + nn.GELU API Reference ------------- diff --git a/docs/python_docs/python/tutorials/packages/gluon/blocks/activations/activations.md b/docs/python_docs/python/tutorials/packages/gluon/blocks/activations/activations.md index e5ba40353a9f..13a4830b55fa 100644 --- a/docs/python_docs/python/tutorials/packages/gluon/blocks/activations/activations.md +++ b/docs/python_docs/python/tutorials/packages/gluon/blocks/activations/activations.md @@ -19,7 +19,7 @@ Deep neural networks are a way to express a nonlinear function with lots of parameters from input data to outputs. The nonlinearities that allow neural networks to capture complex patterns in data are referred to as activation functions. Over the course of the development of neural networks, several nonlinear activation functions have been introduced to make gradient-based deep learning tractable. -If you are looking to answer the question, 'which activation function should I use for my neural network model?', you should probably go with *ReLU*. Unless you're trying to implement something like a gating mechanism, like in LSTMs or GRU cells, then you should opt for sigmoid and/or tanh in those cells. However, if you have a working model architecture and you're trying to improve its performance by swapping out activation functions or treating the activation function as a hyperparameter, then you may want to try hand-designed activations like SELU or a function discovered by reinforcement learning and exhaustive search like Swish. This guide describes these activation functions and others implemented in MXNet in detail. +If you are looking to answer the question, 'which activation function should I use for my neural network model?', you should probably go with *ReLU*. Unless you're trying to implement something like a gating mechanism, like in LSTMs or GRU cells, then you should opt for sigmoid and/or tanh in those cells. However, if you have a working model architecture and you're trying to improve its performance by swapping out activation functions or treating the activation function as a hyperparameter, then you may want to try hand-designed activations like SELU, SiLU, or GELU. This guide describes these activation functions and others implemented in MXNet in detail. ## Visualizing Activations In order to compare the various activation functions and to understand the nuances of their differences we have a snippet of code to plot the activation functions (used in the forward pass) and their gradients (used in the backward pass). @@ -237,24 +237,40 @@ visualize_activation(mx.gluon.nn.SELU()) ![selu activation and gradient](images/selu.png) -### Swish -Swish is an activation function that attempts to address the shortcomings of ReLU by combining ideas from ReLU and sigmoid. Swish was discovered by searching the space of activation functions using a combination of exhaustive and reinforcement learning-based search and was introduced in the paper by [Ramchandran et al](https://arxiv.org/pdf/1710.05941.pdf). +### SiLU +The SiLU is an activation function that attempts to address the shortcomings of ReLU by combining ideas from ReLU and sigmoid. The SiLU serves as a smooth approximation to the ReLU and was originally introduced in [Hendrycks et al](https://arxiv.org/abs/1606.08415). -The swish function is given as +The silu function is given as -$$ swish(x) = x\cdot\sigma(\beta x)$$ +$$ silu(x) = x\cdot\sigma(x)$$ -where $\sigma$ is the sigmoid activation function $\sigma(x) = \frac{1}{1 + e^{-x}}$ described above and $\beta$ is a hyperparameter set to 1 by default in MXNet. +where $\sigma$ is the sigmoid activation function $\sigma(x) = \frac{1}{1 + e^{-x}}$ described above. ```{.python .input} -visualize_activation(mx.gluon.nn.Swish()) +visualize_activation(mx.gluon.nn.SiLU()) ``` -![swish activation and gradient](images/swish.png) +![silu activation and gradient](images/silu.png) +### GELU +The GELU is a smooth approximation to the ReLU and was introduced in [Hendrycks et al](https://arxiv.org/abs/1606.08415). It is a common activation function in architectures such as Transformers, BERT, and GPT. +The gelu function is given as + +$$ gelu(x) = x\cdot\Phi(x),$$ + +whereas the ReLU can be written as $x\cdot\mathbf{1}(x>0)$, so $Phi(x)$ serves as a smooth approximation to the ReLU's indicator function. + +Note $\Phi(x) = \frac{1}{\sqrt{2 \pi}} \exp\left\{-\frac{x^2}{2}\right\}$ is the standard normal cumulative distribution. + + +```{.python .input} +visualize_activation(mx.gluon.nn.GELU()) +``` + +![gelu activation and gradient](images/gelu.png) ## Summary @@ -263,7 +279,7 @@ visualize_activation(mx.gluon.nn.Swish()) * Sigmoids like the logistic (sigmoid) function and tanh where the first kinds of activation functions used in neural networks. They have since fallen out of use because of their tendency to saturate and have vanishing gradients. * Rectifiers like ReLU do not saturate like the Sigmoids and so address the vanishing gradient problem making them the de facto activation functions. ReLU however is still plagued by the dying ReLU problem. * LeakyReLU and PReLU are two similar approaches to improve ReLU and address the dying ReLU by introducing a parameter $\alpha$ (learned in PReLU) that leaks to the gradient of negative inputs -* MXNet also implements custom state-of-the-art activations like ELU, SELU and Swish. +* MXNet also implements custom state-of-the-art activations like ELU, SELU, SiLU, and GELU. diff --git a/docs/python_docs/python/tutorials/packages/gluon/blocks/activations/images/gelu.png b/docs/python_docs/python/tutorials/packages/gluon/blocks/activations/images/gelu.png new file mode 100644 index 0000000000000000000000000000000000000000..26a2588e223207094e4d5e4d5c4bdad8a7cf720e GIT binary patch literal 10289 zcmZX)byQVfv<3=-grrCbQo@gJkPbnlLqNJ4=}svLX^?J^?(XjHZV)(h*P(ffd+&Jf zkH;9mIs5Fh_FQw#_~!g(&^K8zv^NBAU|?X-B*aA&U|?V=!0Q$y1n`$&2WATVgSQu! z_>KgAej*tLg7?Ta;%fFVFeq3rzp#@lo)6$6m%~?82SsaR2WLGyBN$6P2OA4(2a6y2 zq)tY5_CKtx*ce$DndwPQ9UN?Un3(>*&l#=lOqgnpe&fNwkitla2z_@+Ia+bmjDb8o zpUu%979j=x3Pz#L^rqKu*ipb;upMBZ;>&fZo^q2-_3Rtu;3XPrU9h7aRIXdLn;RUK zq{CURW5TAZ&NMwwWpRt6e-^as84H#x>)LC`+^(_?c%K2js6;pu!s{pXG66{64&=n_G$$~$kw5W_y?s%ZAT%!Kf#)dgdEW&sy zSBCGj9WK6^g^bK6JDVaZD(dv&qDuQpI1m9+auQMLbn+vuA?uwl4`rIwms4HH#ZJ0cS{Naxa#2a$ zs$0m$#>U`|^QtRVLqo%-Qh6kE^#)$wx;DYVmI)z>%O4t$BM4Wvx`Qie%jKjL>IbfJ zbxSQ`@q^ituBD}>-cN7z7&9rUThyURYRwM?};)OY7_u zMnJ{?{QWz&R+EzvWH2^X=YAuE2?+y(>f^^>wwt|YLriVuW;3*DZu>MJ$;r8$4)Nbn zO6%@T<`nC+rO9WBDp@tTUKk&@-dn-s%BD(6OQY)P=?y2az`nSJYBiqDYFNWYMnwsmno>to$x+hNhj#cPDk>-_OnK>(mC};e z`S~$LA)h*I`yyuAPODfKXSJPa6dw1?Aac1B?)R~3$?f2#4cGG?Ch5pYO6o6E8XYdy zgq)w-WoBm5QtiFN!n#@a#Xv$v)?2EzluqUdlS$@KYCr&H+xm28&d1L`Fgltm7D0?k z#1rF-h{0;VEnRE15?ofsv@@F8)z|m?@0Z_LBqU?~f5@4Bn2cu#Nhh-UXJ&p}US7W0 zFDk1)X}yO@;I^j}5D-X^rUaU72by-hoDg%p7~#DA_nYMO>Z&EXWNXV%;PFzi)^aI8 zA-KMt>+bqUDwc|xC^OZ&@&}DlJMn*=MSn_aicAmEoyyF(=>L4^OE>5`!BM1BW3EFnMmzO?n zR%2*rXmfq2#$mbG4dkcm>g?;=X*}A!ayzcyL{~1;9syU?nw%Jb*S*p`+vuU7q*Tz> z9tF?R1qK=$>5N2pLRD;XFoIK*%1hW@w@= zbJQGtxV;z{9Q?@5{T>k!@t3bJ2rDx@Fo&)8$D3eNFZ!RKR|WBd%XS0ay7>%&*Xb}r zBAQY4KW0LVR7lTXz00F-7cD1f#tKa;W!lBs zEd;(DpY`;}%;!p9w@*z)1C^8;4q&yPBQ~3irP0-5prca)A-r>>;3%yTeGIb^YO02en(c|ZZ~T%!utNtyVG_jWSZ-z~DWP1NGatZ&67EQb1H|Zf=Gd>Nh@KZV(X>!NkRN8Zf+k z{St~?pC6$OSF;*)R3S{9PKT->d<6yt={-H%I_ys)dzY6p=gOtauC;mu1qQAUP^H7` zUhYmbKiwTT9L$Cp^o46p#>wW93kalL9n7hu4S#t1R;bQu1qlVkV52+e@^GQ*{1F)$ z8TCDDww=QLul#&!p!m^0aD9hu4$EDfH(1o*et$GuI zIm5Wq?l_t7moFvCs*c#WxEsgEvbMG?9(R{yAXbC5nr6R~O#7#&<_pShW806pYnX2A z_s>naZ9vRg-c=C}XLG2EzD2|!?$%{K1uiU!kE~~Oi$IRy(F#}ftKpq~34!Q%`>P)v zzg}N63I!q*EgNqCdELbIN2?nT!!a+cEP3YM^qUWbUp_zE9MMJ}xmq(%FntV*uQxD4u=7f5HxWIhzd1E#EOmO`vQc0z8~eN5Fq zIo3$Dq0vT~x!=C6S{Met2F~NZ_xIytJ2FPfu+G9!|fsS4vB?c|PKE z+fk(QIO-qHS5W7aHu42w&g8^wfap4ZCx%{zejo|um=jc6PBJs8d%iPS9~L;MeLs#9 z(CjqAw4{Ig2Qn1@k_%fs?)!&_DcIR#g5FUnd#)@jBr<5!!bC<#cWn(s!@68Am?ZK# zGe+>8BSr9E6za+LYug8O6&(_CljqBqT4eqX8>zp0z*S7{`=2gyTwiE;QsA~ogneRi*oSs7en)`%U1Z!R1|rA z98&*CTUPX(lXLPMopdINz`Uf#o?sl{tfvbm=@qUQ+k<5*rUh6z*eA^wvPOYg<=_%s z95xLTQ=65B@N%&rE6UCNL#l)6c1Oy@pWW)(wvRHtiC-$gnk{ZuFG($7#CSA~-(8|# zvos^adwV3=fSn*w(908Y!0&!-IFb1kCcfFTV#&HK6}sj{k~M+U{)b$QnwGZl80yuM zBkNcH=i;l8^aDLI6(WN@RrkiWVLFB!h&2`!5B-5}$&uc?$&`v`e93L~_4R`Sj8K3j z-r?c(WeEAw#?!zKK?c{{PxwJ(8WIMft=CgHpC@m`+{_ty# zT#^O|Yvbfp>rB%bP>CYtx?4rDoO1b-T>t@3EVV_d(6A=LLP$vH1sul48kJD`qjB-_ zst}vf5D55AbjL77575vWoyBM%ofc|5zgJ_3{P5v}=~Ql5P0~AI)kKA0FQzpyyM5Qy zCc)rSQfv2k2Y&?n4^fyKcdO55_(RS{r&}55%Q&mcs}Rjt_$bV<_JBu|gqnZO0ixCG zf0h=Pno`Use?gW@j#rvghzIx#dQsiy{_`u3g3CL1*(h2Osl7Cw?LVc}xn7C8)NMd` z$dyU9adz%AWuGikVG2NfFC0P4f2fs?nTb&OMl$ii6>GH=ogzGwx88-`msqZoZu1d*Fd3s5OiW&42m5^{1T z_t!^iabIX|Y1sof zn2^JwT#R2hkl^kgR9aihIgm{2m5asaz?Pu`EQZg@%D4b|!^FhIlmq9Nnmv6hJdNRH zhaXiihxHFBFRt8=mEb43uA!GUTUEx4l)s(E!=pPJ^P`; z2oipL-y(xYjo~jCn8d`RrCLpgS|OO3NuiW%7)Z^bLfA!Gpf=FKk@)>>wr`Rgp zyVe!-92V9qG{^kxV5FcC$WvOpJUs2nJ3z-$`E#)Zz}Lm5Y;}-ZMTj^JW_$*v@ccQT z)y~{Cg?i!lex{_`k8y*YM^uTkWm+xjrX`IOK*Frrbgn*>BJ}aLERtvZ<%w0uydRLz zalBv|D|bXLH#UOKI?M_`&wm*;@e5XFI19dv^eyXq--+Q&KhtJSg5DowoL_)4c|>S> zo;6w{Jl?zhhR9LU`wPsViYUuK}XaZ^E%L*H+29~}X;=m<#k@?sX;Nh3uE z=(b|2FkUBXtKiB0FHdeCNYQ_4wV@(=BEC8|Bb)sQutF+MY#iHd2t~}V?z4${?oX=) zT0k*};xs*qQcxTADsSgrhb+gbfT~(~M{!bQJu#98mlVhQ(bM5b*~>lB`26_KSe>%9 zPk%-Z);?^Po^DGYugtDOrjdcx=Y>SAkR#}qzw!z?8;2oTN178V{RV1#lV|8M;kNutuS?^0oZMxw^ID77udEb#9 z>@{QF=pXP*(d%r;`2)Z>=x3JfLqXqTaY-2%!k!-Q-w_ZVZKIFyoIku z^y!P`bKRvh44*HyM}mTbU$u92`Hqi&V`XJMR8PP}>mght);l2KZa$A^bU?nWbmn$k z1g;D!zxf(-JX>4a_~x{P1aS~KVARyqY71lo-K@HvrhcAovK&X4_v9zUz3gR*I%nFS z%HQAr@@Od@)Gcg3Z%)mvu;0D=qM#5Z^9DP!^^UwM(tUMBSey-`HE9x#s%3QbzZut9 z_k~bVa&pm7U4VS%(8$TU!*Jqb*(^O-N6~p!y+|cHcPaNe?0V0>VDO88+Eh;9HoL_f zD|6M|69OAg;adCQkK^)w*HnjjYA;l_;)%gaOD(CE~6cvO+;j!yh zpp(_8&sdoO=&7SSx3P%3Gpq4pRwyrWCIQ>|^i5TLwClooV{fDVc?FN=Li7^_aLZ3L zIXBRAkL)e%v+G@}8&|G{rffGk;5}yQ<&P9a*C`tyYu4vzDZ0l`kperGN4sY`cd@+^ za{GOAZbRO3=x7PLN14HIP(`0wb*W@DiChj^`$gRs4i-uK$dKh;@Xw*q03mUlDlLfmvbM1-P^*YKSp&1z?rz* zU59tW4>ikMzO2CWV)pxuUr=|q#BRKGUQ^|!T72<#nr$No*-&zV%2Onj6If@f|27TRxbm{c3jl2s;8E%L!!ER<;wK34sG(badurRH0<`Cyf?hZ5% z7BPV2`Xkak3d3{6@Z!X(d78|>w5iSa$Ug(yv^rzLTNzO{EiT!RfC-sYmF zSxglKK+~Jiyw0iAk_IYT8?sT$`ctnVbye#>zyktS)|GB7wz%^Q7e`bsK9ZdDYiS%XK#w74B^Bf5@{4Cvn++{J&P=<2`h#iIS`^j~OLO z(3c-&ec5)LTWG0uoF8`a3ULiRV?vjej`-P-cV}qyY%jZ>@b_EQ&d#1CJM+oAyOU2_ z8oy|m;LfM$j^)&d)A^Ca+TaY%jE)W%$O}TmRJDb0{wkf#&)`*~xeLaK(&TvF?NKeS zth^WWe2{q^6&fj$8s&=b>56#G0INs?4-GiI&$#$zbKSUc^bB2jyZzHEW-h?l$N{Qc zTc$!o&25cb@}L+Q?Dx?l_Z+Bf8dPhs*?i0VYIT8?24>f}^m168sIogboKO*AoUc+=hqCN0v z+DE5H7@u9|W}!yB*=n7SdSL&jDTfcFoLE>{N&#BcbBWFfMNg0)pfCPY- zGES=^S@CcrqG0ozCBe#20ylv-{3LY`K01Agd~gv=i!y<2yk!qLQY!+h;#n1k9rhPB z!|p4A3l@n@thl}vTbUXxF@%h6E%?%qxB<85306*Hj$aKAi(>@|>uok=!zx~yw~PLL z#x@Q`lXV_bJ~5GzpM`~A>E8BJ`T_HD$Lp#a6PCvnk@NN(Q@yrX@ty$90)Ji_mZY1t zd1_4}!g+M#lIt<yMhZeZ&79A-&gm0`iS!@p7)RAW8P zr9JxopLSt!88uB$c>L^Ge8RzCUDsD1wBcAVIwGHy@BOB?rtO=n z?_eHz@6K+#F<sfI{D@M>3mu+$py#bvudlTNLR#$9D>cGTf)s5jsT zQ1I~J5H`V!DQ2rP!GGLVye65Jj+4#uN%jpY@H#MEVPQtDxo ze$VAMsLT>m&|LAIgzkyy+66h_1KS@e;Y_qwSXubIqJJ7OZ z<1B(gheKmgqQ_MFaX%N1PPfBjeF~k@Q*o2|?n)JXtYltp4c+tMAY4|NR$<;3Y@j4n zr*pDReN-&Dytnef94*@rkHyiGwWHOl+1`uulIJ$IbZje+%3>$pS$Agb?}{a}m223T z^cWq>`S=bmOv1fxs3uD`Y$P{@{C&(X8K$nWDl!XvR4MotgRmre&O}$ke~b$D!HJwS z;j=PDwa4N4$rO%JE69S(eVPE^cRh~)K2urN{|L6Ny$~FP!!qwAs>)tgeWD+0LPe*sQVkEFC>id%LAgC z6(B1R7?Q+&NAy$Mc3?kfO_d>3781w2owY{I$aV(#Vr#dTDJuL*;`>^7LtM0Ba&3*@ zA34Gz_T(hR?iP{AEZssZZ{}2~ zlcBO-og82*tJPcVp_<9jL*FGGf8wcPJ6fdnF|n!y4r?mnQMYZmVSN<^8DIxU%-zc^v86o$82y9-mMm@^jc>iEk{PPwL%bi?6)WI`MDX@y>D-kW6Kw z-8?IEy4d_f|AM5YzH}MZh=ERIsopcI>V=pn8FB}VP?MGuZB`Jit;{z=6+aD0z_MhrdZ0L=3BqOl(;i+BgDOt8#s4%TiNS!5 zAa0Q5JOGVwcfNxBXsK?eW=^}xM5dEiTJ#2@<-&v>ZXF=R)S<)qa(vs^Y&>Twpma}o zN_|khH)TR5w|&j;Jz%&2n3V0NlU8zS>OjB<1fUa10)D1sGHAk7gl62d#a9$ z|L0UhtZ~^{2nWh|eO*&F?M5nTr@fGaNjSjnj8{U^ecobYcLQSSf7>iUK|wHvMn*~n z(Ry#6MAG;}g#X5L7Sp5D(7mQsx@{CzJZsF{kn$2xQC)SOIH2}tO=hNJ8`QK_G>%z- z9}HD^6Rjq1g^=|&H0N7*IO}ChR#(0Qww8;gtxf|n_ zaasPpL+gmsp^6XcxAo@(e)v`g)5EluAptTNI9|)~4G&%qetz&+SXeNC&nQr!a_+$> zv+|fV;?S5qpkeT<6sBg#uP<}av?C546ShUiHp&Vf)uSHF$*&KL+RzVhiwk%DKOAk zGu|}ts(Do_DZ}@9dV7TYgI?*<`nKcaUw(85u9L(BlrjkbO#^Z79)7YD>Nty~MVt&j z+LqWjmCKF?1;}Ba6}A$ochm$hu8GSLU-(}%!=vCbnJR=X|>tDsEwk^s}?P*W7X73{h_|{OA4+h zxNM6Gzesb%p}W~X&@Z3sl?0MfN9puN(x&L=J)_<&e7$k!IxFMlIa@C&v2~0|t@LsB=QI91onT(NQxT73a)Sqx9GlxuZ?iRh7vQWg%4{0i;rm z*>kM1zOgtt8Qp~AVP>!NPeG+C7S1Z=@Ns}+6)rT&@&D$68`=|hn_ekA)S?h~OB{xt zGx4YB>SND(?|{CT!a3R1Pvipbp6qSxXf0JYiRiRZw`MJ(Glz*y$cA@b6&yMbgF2ci zoj@Z_Y^_)oSqkhWk`_^mIVT3cII z4O+huOciXK7FCM@#S9vX;30q9N)x6U{@aSh2MxO@Yw8^fPcjH4oAK-I;Fbg{SsuO= zqEjcw?9_B#9~n&~vkl>cTq_5we?fUEH?(Gn@CtB{Oxq@_>FElDx_HgLZKp9UVYq6;;cCrYvaeDjf})5!gS6-MU*JKF&3r zXf234I8>K3Mm-&TTbpcqI4lwd^e)*xDrgRfr2cjP=Z~jdu4FPXF;TlOc~|zuL+bgw zs!eSsX&P5JaFLW`;s1SlxfE#&n949^C#bBdvRpi|ALAEuap3~}X;fmq1VBVB6rbd! zxdqFn^1KCo+;Y9H{*jRywKW((u94Bxqn0R}PxV|Ky24HZW^3c*L@u6D%VLq~v${F~ zC?=@@;{{08?cqd&sV6i9xRoa7m!S!8$#^(HZ|bSTuBFdvrSUBZiPr_;%wPxNobCCR z#3wKuGE=BT7Ym4jsn(10b6S$IL^d&*MAjZ-4=*q_Flfq7n-k-9x0g3FB`0WE)f%g? zzuw`yfBLVh(0Z-a42*Sf>{T^4Yt~pUDHHKJNyo;;6#`z*alIWj`LZNNLGbA&OHx)= z8O&jD99;YY+bjSB2stqdfS;zNpyAE0 zaKQ{rZ|s*g9XIY-S_lZcZ3Iyzd@_&IVl3X>yWP<@`ug=NM~U9)ddKcn+|{Yu<*tyM zTjR(b0vHj>0jx5|(ZvQzBo7advbOiM^4;a0Z$w1I$X%1_|F02fG214Z)v>Ztxu5?( z#KPhwl%JnJwZ?po^~Yp3t)$#G z6tLRNhW(gLmzGj0O)v!v&(sIOfb(=ZTKuZ1nId!lDN$NPR1}=t7@)E&7N0jG1V@Y2 zs>aQVa9NafI-SQ{+@1ZAk13wp&KAE3Fp;TZTXC#?Z<%dQQ;$&|aVcFdsAW*@Cg9v1o8f+hQR%@-- zWIuoYTmtoKxIbx22PZL9=;M&pD){_hKU=KU1xP*}I4=fI}<+OX*d$f0xDTc~fQ1EzgeaA1w@mNP6j^HbVF*5a z%aJ$|#SsS+B`Ux|3&BhVt$=`bPF~(p@d-G%f{YAOoVRcPe*E~+qVcyJO->w>PU$N+ zxY(?$taB(-1#pSA5^dmHN@QeYg_cXT`>`t86%y%qp~U>^ui@dTTeQo-?K6EOUKUlY zUQf3suk!&xqx>`sNM-Y?C+Oiq7UHjQ7Dve3ca`71sUr{{X7nl?4C* literal 0 HcmV?d00001 diff --git a/docs/python_docs/python/tutorials/packages/gluon/blocks/activations/images/silu.png b/docs/python_docs/python/tutorials/packages/gluon/blocks/activations/images/silu.png new file mode 100644 index 0000000000000000000000000000000000000000..6060fdde3c2485a857eab1ced937ad6ba4dfe440 GIT binary patch literal 10155 zcmYLP1yoi+w*^r^xFx&U?nX+wkp}6K?rv!i>F$#5&JW-4{(rsaTFA%! z?wvbx=FC3l?0v!%!FZi1G3RJ-OoZ1TJzpi)uJ4+nGAM8916i*%&z6TiZEXTNu7~HF0#Z zu(M@lU}j*VdvEUSY|q2U`2XKC*g2Xp)*px9LP5QUlKd>B;+}rI;^wY0GkbS_O3rGL z4=rh)6NE2}ASpqn1cCO?_Ahc4{fzc`{86h3CtPoQID%}@yHBTASp6K~RW~;DoBUt} zVP^Q(apOu7cFLotF)%qn-vcek4=?#VM(dnrf5>PY)ta4pvxVnUt0);8lRw^`Rvy_3 z!z(g`5S-#giSan^efswdu}O?U%Y|(%em|1N8T0GcC+%&rz@iN)L?tOHgu}T?vp*@! z4S&Sp6$30PSKPM~)l>Mtax^zLn{+U7a3syoYd}p-PTGFetDK9EixXBcWNX%%<`))f)6JG?*4nnL6)P8hB%-3B3ELh{ zPF`K>^c9S!lx-T&RFEFBm|tuMQJp6LjxGoK(^thX{t)M&)X7ks|U zlNxBQe=jN~#^U?@^!2waytMQv-FkruocW1zL`)`1UsukB?(3PK@3(h%rz-UYg+)Xx z);jo0)yiK@BPmwnXh$t??bqqc9^^$u9l+jr{o3EiHKxFP>A*xYN?VLbYHz^ zWQ^I({2IN!{zX_=SfkyOUHPX>PF2nR1y@w)>dAq060|Bo?zsH0U~xwtWxz# zO0Xyf-4--rVmWtrcMIq=788V}Mms7xy2xgy-5Q&vhRKP`)YMcYJSM;I-_Z;W4Y|DT zn4qQ#<;|}4CY*uk{QRJ3zI2x$NJmC$V}x|&qw%u5LQO6~rk_}aP$Y7073LB0W*N;I zHCXKp)rkk3+&%S25m?MukpfSId(X@Jm}%sCvYfio>XtWQu2E->UTZeClNTef3AtSx z7#@!JARY~6zx^8<78$?S{|zFS``Nn9z&jRWcwnB*SaIT<{QN+DUr6GZz$22V;@x(# zzP*FP#^0fMBqE*!$m8whMhFRu*=YKt@-9J$p-dV_6bcc~*;b5T4e+>3kDb)X6e_v& zFNd?`e|L5!f65Z{$B-}@MF;lLgm)*9pg27lqC{jJOZZiOO^h*#^*2WB>gsB{S+r`q z-$}bVUCRR2TfSsgR#L*_b7#V3(8W@xH%rqmyD-o zU@+b4k8(a<#8y{Vuc@s~<#Hf{LnZE=n27D|?QPwuGa5iqtJFjOC4$QTSxijK;{157 zl4Nvrl#7>lYj<~`Ksx#C_O|k12{%``W8NW{9d7JW>vZ zB?Ch=G6BcgZRfv?)%LHfE(g;r`lU0L%&8T=L>`6J+>D)1EWVFRYwkzY1M>|w+WQt6 zxzvdd2rGSkK_@G%wH8y9YUNse<2fSGDZFkB*9S9O;B$%$rH+oyiDy|2j#*PuL~X6p zzEN~^G#*kg!Y=h^)hCCK8K^!2LRr}lZPR%d8S{-L9NrSUV_5?uBT?ud#LgD2Djn8; zedFzD&};Y9>F`Pgs1P0zae7y7=<3Q5h0p$K{jjo=!Mn}vbd^q{D*Ng=kyagQeBXKR z`hg{e-FOhKI0|Vd{3s~k!OIQ^^1$Gl4c70nGk@RT z-L3EMdkoxu3@peu#b$Bv;2ZI_WZvCZsEu?g<#@9{82jO~s3-@SoSYo%l@@G; z!nwe}zyw#zwIVS0r{~AhtD^->Dk>@{N|}^jbNarrGBQYEVPVrHs)#QqsZNU?JozVs z_qHMZ=5$S7LINHf*XCjc1Os|e&ku|YVIp=+=l6r~%NZk1H3iIbuWSZlNRkVrPhc*% z!@d?!BUJkOrkrjm-QBXrCK5ksWLi!MV+d;uOs<&S3oryp{;N#Xj6jzV!Vp&J?#P!h zeR7TrS$sJE{m;teW=s0z@?b$a86_RBpRbMj4hl-?q{LBcF~`>TK(r)fYbF*S3&Q(8 z*rcei{^1!G7~6vPZ%I!q`{f6?>=`0_kr4}B_)#PJVwE{aKR2r#^A23_@8ZqT`}vR} z)8Usc;*wi22s(1=dtYN?Qpvskve6ynbveSR$}?JC3Jq^S0~cTGemjuoY_Z)#fSSy& zHC?!$Hj8rkqiW-Jns1gzq zDo{6rzPlJ^v{-3jUukhM+8xa(h>-^y46rq!e}AeF6&)Sjd4DnpOdRTfg#sXbE*Smw z>(^%hkzg7b<;en@_oFEFw0!(%-%!vIq6-~s$*F}|rQ%GEDX(v$UlP~)FkL~;qUE*0 z#D?OjSUoO`gHecncLyQbdD@oK)f|X zmDOhBfkFDmW;S^1h(CU4xhtp6?ypIG)6CBxEJDIR0FgYv9xQZt^Xlo{K1(mxTj9#{ zUt|5UDHK;!ROHl^Gk<=%4-5{*0ETpQWTBGJG#<<32N+MJudkn;o(=%+%!c{m;^GdV zr~5&1Av_|Y(Q=c6PLqAk)jc!}EI5jvT3wH&a6>V>Uz8wl$J&S@Sl-?XT|hWnha%Id;74?JhH8xQa-{I zN>EWk;I#G6G67-dOP;pt(Cb}3wKSYKiX;+noM?2Bj{Dy%*d}IV$bw|T!O3Z>Hw^P= zwVl9kZ_m8JW(i;@tY>@XX~*M9gEv1vzr|{s$G%f|uKnJ4j^mn@$8bEnqH5|r1qfc^ z8Cr65csS9c6A#-ti&*5+bb=Sas%Pf|B$(LPeZbx)?)dYC@+3&GFr=iUZI36d0JEXM z#=-oQ&l-h6<@*He^%fPC(dnPba1wA|eKdT0DFp>ItJyM|+xDbJzA!XZ1EwD-b!3@! z%Z363=-(gH*3RMz7hym{#SHd+ySsyMXWb=c6K<7de(;tflRH);tXft-gh`Fy+8Zl^WP_y zN+&b89gm@J`rOQI^XdINK{JSIPrWH}snFLTuDu~0YW#E~D?s9q_hbmOmE6Bk=JfYp zaL86!`BNl}6RWn6qk{wKhYuppd&dd6)W*ifDto^vb03r|3tR)0Ej+w<>R^!Db!{>* zzVs_|fD92Nc@omntaGG6sm)DIgD#7&q4XiKz49TWmk&KQ?o|b2D{fU7^hGc*FRtR? zzziGkjVjA4?j5VC%E2d|jFX?S!@}^7SZ*BgsfRIBtm84~D_ZkB8ENK1M96I*W-qU- zOdMW2gPftm`!OlH5M5XW)uOpiM#I%Fc(uaos?H=SryW;Ip^#H2ZKz`I5Zr9qyqAX7 z?SmIdsi)`QSN4iuP-f4(+Qj@+)S|Fq!knCr4oOf@(DV90)Y_{ME=+C#V*X(txNYB5UZ1P>z)No+nQ2v5EtleTHn;?;0NgLjvlY^*z zu!+I}DZ;k9>|0lZ+-|h(U&a>7#I>NC@9E!we3C{Q!WVYHpU+V21J6M3B{g&$fyINW{+Jl+Y|mRA2({p1R^HP zdyQGnucqXuI(>b$cPRs#;KFmyZhSE$+R}&B$3!>umquo3RuXZhx9K*wwrnyY(7Gd& zP6UR1H9{abl@tPZa;3$Nm97z!7v)f~5~J3*@TgM0 zkqmv*iCrCxkVak4o~W|_9Jqq&Ll@(0iReJ0j1}bOBeQ-NMA$(%6Wi}{uue)YTOYcZ7?Z9zdtcMHoL#fN+Th>T!>O=%UR{bRdHbd58mJbvq;LIp;DB%B<(+Li~^rBv~K6t2QDNK!KD5?>C0by zHN>AHwh}itvePa!QUk_mim`@VvRpQ%8!I#k_T-+7f!zV??e!+(uJuz_BhHuQM}20_ z_O5S33&A2MJ%Is%9W#mOS3b3u0Lv@&!bi84B`9l$KK{C@7w3-qG18e8mT$451?Fl9 zmKK=t=&H{`drQHZ^A*vvzSQi{nNA8J4$YO0ls(ODj{NM)aLbeYDF?WH6D-Z3um#rFb(wqLYLYB~1 z^Bc;is_}v4=2O=&r|#UH$Y8qhSy+A+gUcUZF2_!n2~$=^7S?CM)n>w1B)z=gjU+s; zBFGjDEC^xqy3IaxJ2;hi=(e$+5+NECu0HPLg(%f%a*_E%>AL}=VFDNFMk$MyHdx4X ziaf;sly!4$lMy`vvXt)rX#K&BW0Dv&Oofm+l8Wd5j!y8KVfu1&l8aB**^7sbu-s_m znQ9;EdwRM|3^6}E5)wHzb*RVX&H@i3X$e1Asc#4W=YvGyXFGSU4q|pAVBk6^t@5N8 zfrrquGkZrTr?Ak_Z1qaLI;*+S+55MOHuO&rth5+?L(6}zpE*j64xNwuVLk=GBEzGi z!u{&$2>|u8lCrYRj})|-h0)H*)Y^UdhY+^)X1KHk9->xgwM9{#W2Mn`RsZ76mgHVS5g?B^*7Dr6-Cs2%09k0 zv)G!x_I2je)7o8NA(mE?{GJX>6<(k4vWFbn5f&)xmm<`0S{CkX&IxF zozJkY=Dnf#t_xPu-vPd1XN(N)Zg0bxD~|Wx8B@boKSm;|GxtmOMOSzjrt zhD0#?A0q!6lSlz9VVe479$n$>yLW(n$s@5I{a!@5!{0Ok6->sIR3sR@dDtl1 ze^^O-Tso=Kd7Ax3oQR7jAP>_#W4b&KlS?Hlto>yG%iDdVRS5I9igIs{!`KG5wHziy zMUs#YQBUS6}A~ONijY3 z4c{DH@|L}{nf-JAdPbDg1r@+;%_Jq|e5Ut&5FW?XS=-T7u}j9%@m*HXxYv)%yF?Be zK2Gv?xxOhM<2;3p>ZTMKtB7ZfiK<|5pZ_VxE;5D*}K z($dn}j>GKM!!qqErazlAy|ZFH)%QrD?-yu0txuBsMxQeTZ@wsL0~!1@STbc^GObl+ zFNDwQN(V}wH}6?kY9_x0R;6~+-8sP!LQvBe5h(ea+-v8(>Ga?A;9(vQ41UX6;*tL5 zHB4DTD^jf$h*F`T{h5;Ao&8(N`FMk1S`Ea*_g?7X^~`Sh0PX4kDV;PzT#+6FQNSIA zy?Ma{eTvzKP!IG)WEEa@c6<~{Gv@mx-N1*htZw#rC@7AWri^t^A|WUypq2z|;MVEs zSmZla_viavn~XFxY@_L(YW6BK()=SBIHI~^xQ!2_*#oj$W^t^Dc$e1#Ui|5rS7kc% zy{qQXYGoP*hkSuWZT>Q$j|ex<1dd}OD@kW(bnx$x>b-2gdbobeRMbBawNugH{wT%Z zVH5Hgti{~TSA4~II)Sy&1c*&k1w2jsb8331T(t^fj1QJ3VLY|S$w)zVl!--dpuM}b zu}ky4jo={U*RP=QHt9sv#z$vZMz7i|oQ@MZ&xQ{SJn!z@1woQ#{oxkF+C!d5T4flH zI0KbPY7ms7ZqC(cKgGOF$Hi1eLn8nT42)i1`_TAL6<6Sp4ZWqKqN2nd z^(DWwJlA{TOV<>j)DkmTD+E(LRKSv0hEH2nO7^u`f%6=Dot1R!wP&>Jgm>Sk*S73T z=SvDJD38lZPG>Cy4AwSG^<`>p#^jTiR?peYcuAoqN`^a4a;*MQm+J0dIIf}j-mzuU z7Qzw;4|IiR;XG9ir=493L=R7xJOY&rmUgfp%&}fXw3`#L(R~$S*wY{ap0H!a(*4cu z%kbkthZ<#?w443$2fHiLszc@Z=W5TA*=yvzr*Kh*%G&)sQ9V6$`K`qYV1Z!I3mdI^ zkUXkH<^HNBnb{}SG$TD@-h-ePtxS_`Bpq`Pw+d3CZJv&7DiRed4-b(!1loY2A<@m2 z^XZnz1!CFx-rbb`wR5cmwWwmszeD{$E9nxTa(S|H=dzW%Fw@o@qN4rC0AZZxLzXL3 zW+Z{!b-s7ra{bcU=in!|D6h%j>1OsgBmX!RH;b-S5B8XvIVJ))IZu$|y zvQlHa_m#H}9C?1IS@5P3Dwq-dm^UH_aZS2Duuse+H~l4;UqoO_F~1x(RJrfHhzAA> z&`?QyBqK2N#zN0OK7(5sZZVGTWik#F<50Wgg&R4$3KRQ8Xv1IFl$81nt`RTG^S;_x zwsP9MN-L1BznYXUq0beeohkhVi>x3&^0cT$;bj|}$*E3a+7labg;X{$5Fm2y99ti0 z2_iXrV(zC&6c?LZ%tN6=BqFhH7)H(Q>7yT8xQ;1RM^L*gh<9Jt8X}MJ-)GFRr(os( z_<0t-h_Tp-H;LI*&*A(PhNt7p`RD;bstkg(YnBNho7|gAoF+lSSgqw{kd^;_<~1c| z`lCqurd7A)^~KaPk-;zF>nj7!|Q8Kja^U2yzw)?uTVN1Saw4%psXi&5_0H(Y_RA#a0;e4`%o; z1PCuzh>lBac|kyr^1;endQc<^|>i;^oSPcN0cIJGwyUEj*}88 zujq{NXj)-Mxugtk*Bnj*0o zsM`CtfnuJR^U-`PkPk7j zuvB*BVq;^E);c@8g&4N6Vw1FG)V^hwbz{^ZTp8mY41J_-en`nYBFG&6IIe8Smgqsp znE@kU8$InZv%ZBOFiPo_j(o?pUtBu1RZ5@VQQ_yg$fqNtju_bSDTBPEQ|o;Nc4V5Z z{#RL{M4wob7PUK_wyyGM9{7ptgV}Nv_h~pr-?J;YO0o4LgN}I|HAANA^F7g|6Q1GK zd>Ih)_-?Q>CP?v}yX3tn^R(*j+6?|e6%Zr`J6JM*wQx)ZxPnIh8Eu%&V z^85hkQ{$cF@0sc{o|!EpO$KS{>M~wh%>Zl$`xg>|v(gZLZq#9Oy^su1y7bu8vM$ zIyOJ2BqO>kr`y|)FXQR7FJIQrj?|O%!JAiPzDlZgnO^Z)EOKdBo9g>&|B*iCAEAWF zc8BrgWw?dXKvW~iTVn|oBR=Ej)FyHKf-;o+)*@@fp-DjB07BmQZ=yYSV`#*JsLpSi zO+z_!06Nlkg}c~JhpjW*mS>mtIp31CvZu=rWpBM#GOVuVri(R$$^^YK>qAE4zyAmj zgF&r7H|m9v-Jkt^yW`K$%U^`^rZ^W#4Aa7+Zw^t-6&=?p+^$KtH|R(>{E9P+fyeyU z*jB&4i}c2<{?8F8C|Jl#FF?!UW(h+^BLyrb$SSJn&smn_^3Qh*bfAPG?e1)Kq@b2d zp!I=|C{k1wuZu=uJ98}fHkUEbO&u=mnv;pRU>U2_7<9l<7#g2^dB@Kj^@Ooy^Z3xj zyQWlCVas3H*relpb3XIL4ybJI#*_~<3%wbw!pjS^gmOc<5_ZD7ML9;bJZ;z$Nvp>P z!!rg!eKSS=D(Lu8uy&kOZ7b>U*6SVvbK5xkcGMc`jog+SQloTVPE%!S5U(4L)t6-5 zE@^Uh>$|tZV?r?XDtG|>(!|aE!zMa;DF0=nEt7t%-Ioc28ZjQED}szWamvO}pANH0 zi(+vBL}L&sgurP(te?>Nl68Cjtt=1c({Xlh+A~)@9bJpWPQaU1?wa&3$i&cV^#zrD z4fZD7DV=r&YyPo{QMU~Oqpo#WGS!2f&eGV9R%wb ze)SjYV2Fn03)_m9>B)X>fvh(w;1nbdJ&9j)oRRmg4WDS)=b8e=R>)RP>y_8a+cTHW zEvs;3F@{|%CsOTWXLZET%yeOxg@VTGzsYz*G}Fq4Op1nVIh;R0tQpVF47Vg=oKrhxfbLk(`Hs^rJ67vfxL&nX_jZQDViYaP~#*{ zSa_MSWMEEMIMfoPd1&3XAY%}oM+Sv7d@Cs+%?vm}-bH#Ombg;`M9Oi{W*~JOpOh3_ zS$SRYNO6Arp`kZn-=g@Rch2>cH-3(9bPwyq=nZ1Hw0lJ(5+Uqy2!}L%CMz2 zmlUnT7_+=ZRg0Ci;YsO)+SQJ7DHMOJ}Lm02y6}Q)K-fRE~ZRUEM({@=Rjl;%#p@xFQ z=PqYv^Hn4zzbvM@9&?&X_u+Gg^P`ij*Ts9ZcYuPHz2-` zl$=9HihUsBan1(nD)!nCSpe#9hm&RQ0-%~o0j({eu8ys=9|+zI?fiDDd?r9&Qj2H@ zYP4wO94+X3v1>aIzUU{g)1YqvJX*EUKmyR|sDY5Dd~>{HmY4z?AoLaGV`wX4Z3D7R8Hj5M+1c+)``UJ%?k}ZubkcLF!{g%P^MJheyX17mn1u$k z!hjb^NlJ!cU}F~?E!GcPo@0wD3Oww6FafHORHqN*V_VFOwnSye!$%N{8-^@{GBfAq z21m!n8ZA-UzutBOUsg(O)ED1<&~L4scTPu&-T?4-%}6) z%k(=lnT`5YECow(^6RbUCW{n5fVPqHcQ=%Foe+*F0#4I~8k4r$)vp{w2Irf7SI0|p zl-1=l?Cgo4{UUkw_V%{%Kv3#yY(m0kFE74{i3znR@SeAbh&^LtF+iNxWPhfuZ0kN+ zsCBR}(g|aJ#ApEG+^ryUv7ksp1&{~q<7Ldxu)21tC8v zjb9)qE5mtQv?*5`26W{JP9w{Cxg?FzXGvM@0bbClA`}374#c(PY2ij#adB}JVm{z< zFNz8|DQVY7hz01!c$rCPC_E7H)QY`F^uBZ0t?KM-iANJ|0I|NHJP&jkb@%iv0O9cZ zaIVq)NgI%ukv@=z)cW@;^fw^vj(_f(0=AN!-JwYIna&ipqS)i>6G{M-U z#Jc=Hqw294MrEB8(%8req)0Q+q@hM5@!u9D>-nm0xw&K@`@my1d=2EZ_Q%nr!^w~O zQ$J=e-=m4f1d;1^Q Vn%Mr>2sG9~Ns7vSt`Pp>|37;>