From 233797223443d8c3198cb4594404db12146895cb Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Tue, 5 Jan 2021 09:58:12 +0000 Subject: [PATCH 01/20] Add example Word narrative files --- .../track_files/word/Narrative Example.docx | Bin 0 -> 22342 bytes .../track_files/word/test_narrative.docx | Bin 0 -> 12736 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/sample_data/track_files/word/Narrative Example.docx create mode 100644 tests/sample_data/track_files/word/test_narrative.docx diff --git a/tests/sample_data/track_files/word/Narrative Example.docx b/tests/sample_data/track_files/word/Narrative Example.docx new file mode 100644 index 0000000000000000000000000000000000000000..38b3850ea5c5d74a61a27f98c77bcd83f5e78d01 GIT binary patch literal 22342 zcmeFZbDJg0w&-29ZQFL2ZJS-TZFkwWZQHKua+htZ%X+KV+H2o)p0nTk1?91SUYgNRJGJsGLTFqSznrzTf zj)V)nI1roK{QOvOzObC$D`~m^y|=hKR6Idj_nVdykKm8hj&vPmXjQ)iUGtW*m74p< zTWD$`sFI)?IE&`M(P~wEKANQtxbFgH=$5~TT=Y0&>OrZ$A8^~Dn0wU%ov>QDk?#jC z1AKO>Ydf6a1`Hq}gKN?D2oN`eMP!)1@Nf`vP*;aKN|c-ApG77EjZw%c_@JQo4nl4h zDAlYdd77dUdfqOd()p}XuPa&vsLHR0`SAsj;=hf74<)5&Nx7C5N8!!h$rO-AJ`Mf? zr6&{}Z)07=s1xj<#CO1+UE}*@7o~p{aJ4&R#Z7F!8|a61+xDZC8NpNv-{|{xmZX4Q z_|CFz)P>BvUm-r79y0bsP!(LQJ1c*oU>4D>FLq*E|HwDTU;W7IpF9~6zM~DISXZ;| zJYpAOrok={C@rPyWd~tEc?~5p}%^*o`bQKBR$=p^Z)Dd|BJi%e|h!D_&!NsMud3%*WKbx}0nZ!Pt8__q= zSk&ieJ${|hbX)mGciD@Qw{8BzpEk3#x45lalkbi{U#93xiBS@ANRT}K5;5Y;tx$1j zP(bg2F^0rrl9tQ?+yYb0**oHy9XK6w-l{9O^InRfLuqd8vtiF?i`(bNrSm=kGqIg! zp3yjMMDSzkx0jk>$t1udYt@vsw=T+Pntcffoyksd4af00%X&V+)7!$OgNEJAnW+Q8 z`Gcd~odUb-^J!`Y$t_WiXV59|ulN3+og}79?Zo~oJSub7}5XTQ;cj4 zovptj<)0AwPp<*|ij!Zp|92miNqw>d3@{;A;+;ZC+%nwyz>PNO{Hv%T+v!EJTC*f7 zyMUga{p0O6)SS86R4WQTzR!ED^Uo=j1@H{bb5z9UnXwQkOYTMCk)1U7^fux~ab=l{ zngXJ1`eR-$K5X6r>3M5L6XkVrN(yYmq~9II$__}yO$;~u9H<4c$7X_yb7A+cDgn48 z;~81KVVSsRBGKGMCy9DUW*z2R$Hi9bcSW^m84fiSX@!Sc$`gcCEbNs6y{l@y5P&!| zOGcytQ{xhfmiv&nChhORM|v@}Qaz`ZkWk=ok~I09;2t%6n0bTFVj*&BJ$UnuNxcd7 zVhIE-m9cd6oYCitybuy%5pr*dLv)5iw>O9NZ9gi=CG|N|yCAJIUrx6|DnAW9dVH`w z&q{1GgP8fvf~A_v4Xy-IZrfsu2`Oiys`2{R3fF43!ee2)tQ98M*(2M@5l0#)=AZ;T z%tZ;!R~g&H%=b2;W&-j1&awZK$^N*Tns0&!03bdA06_e*#J^1TzZ&bgj)LPZ2a-=u z`71nCwu>Tj7-0~C#Z4w)LZJ&91p#}DoLV^Ic0`?aGGxa4{Sp#eT_2t}88(fWJ7Kf2 zMwLxdw~ez@kI%c~POjK4b4L4=;Q9%`Ly)cHWupEh)xmFSGkKmfWXfa zUzptneAPYla)a^U&OO)a0?EyCRM(X6mT73cTM!lRjfGHsO8{Zbc<3@q8CbYG&fCGU z&T@`neM4swMXU8rf^_K8T@R7Lv7~j_TDp#WJgu!~@b5|`_((WJ&&R3NT)SINvV~GN zZ*HL6@V(s*E|gQ|Fca>jpiMX6wR5wF%sPhaT>!+~P&3$Y=7RRw3z7$eZSUKw-IEKG zz=f_&!4{h2QZ_@Vra#(NZ8kz>bhME>bB#4+VD350&qYwrkHcqiJPU)+RQFsC74f&Y zXNZpN2TAJ-16|rn639Q!2wOLAC!%s74634FocE{&hSU9oc^$p=g% z=EsjT7xzJzTDXkDQ)GK{v|Z@vH8+mO2vpU*?6&W60Ja*g(aYTiWgGTm&3m_QDdhF4 zlj)^QUusOJMbbiPF`(svl5x9;!n8uPouR)k#0Zjp z(OUt+t_=wrF6+r_&S;fYXN-JU+)Y{O$@0-Wf-VHIEX));L`;rvH?D}2`wKETKH0m(GZd2R1klWwt-P;X%g5K*E z;YY|0NbSV8Eh&D{Jy_0Y}1wWlejI-;b;}@UZ)MSAV+fJ1OLpg z?~#{jhW`5Qq+z`t3@SGjYg^i<8zq6U$LIu9f^4$#5Fe7d2WAz7i-Kl_6M*VWDFE8f zj3>G-p7&m}X|BDqd_ZtVH^*bnkypAF2?lrvKsEXS{>m(d(A0~*#eF~m(L{n|Q_*az zdh8~A|1N)%-@C2PyWN^T7zDHGyZB73`uejZSy&hXm@`W@vZJ~#E&`;$sy*zdojt*0 zb<(0m1nbG~iX}CEc!l!gm_D`{gc0p@bwEs< zB55R&1zpXJ4cf@2U@`Im5{gO`9f}j$d@or~2pl7v!NAXAvty9)8+cMn`w(s@lVJ-u zk|kDcy1L_q($Nx4pP6y{@C*Dt8)$Mci`7!LtD{R3&xit2P_>in{xXGRt zEw~hwnWVeYxU6|yGu@L@WsK-VocZ&NxJQMhBitmE1rCqa^Q|c~2XW7G7YUzR3)nGc z!sp0dAYf(wVi>~G#H0#G!m|rxNI32?Z8I&xqgW?%?U@V?=%=wTpgoWWM1S$D`vrHr z?QYDxLeJ_PX*_4@y{~bTUbwxy?01@N>2LuJX1XSal}<&lqVc-T1k7&q`H7OMhj@}b z<@%ITpxd&{qB2LA(T6)q+qT}#U( zSuswE#6Uq#3YUkb1cl%Wb}wpl_YAD0lL`BMvY7x|tB0G1*MKRy=9xy#kI8}ZN6Tcs zHkOIGhc2iD+hL>-0UO9t7%3ai(FjnHVIZ19hU-g@mWz+tP|+4c78Y=%Te9Nc^D;N| z6cJRE94uHE16$o`1vEtawS&y+^T1VhhQF(2#$3(XKU}!+G6sllA>@@u3qYJ1&K1$l zCX!UE9A`apF0_?@vs>NZUE1~UMQ}f`NMUyp7Z^qDh0YBSH&kB7%gY7r@3DbQq*7B~~PZ8msQ;6Ys63x8$yJv5? z@|}ZMD$(JLY)wj$aS4yGi#I|=42x@gGki_!ggKVW{N(sm_aid3o9glR^32ISNXdj< zV~`q11`~6~@8%`E^x@-XS6X&um?aYVPaB?AY;u!Gr=zZ4T=dv#xIP>w8QHewZ@T2woNJ4IkKOq1Xja=htuy-hMe&_eVu7U^EA0&OB3Tr>zopWF$(lm-B&~ zF04V)pxvz;{F+75lg6>4ufSa!bB;OsQ_^Cnhq0H_yq%bip~j zA(jYilS^r6TW)ppH*F5aWoZgyHu%xkx9-*t{T{Ow2$tjqS9!>r=SNg3 z**SF$GNs*408@2{(iS8g3rrpzv_`bX>u@gSmO8_((X6*vsWyH9Y}J#)7b=ArG)|xl zwzGT)+#;rGXL4_k=Jk_WQ;>Bq_lMI=-H#0IQ+(zy2~eA`i~fjhBQ<}mv(~2dSDtxd z#zF@1wc%Fpz+v`7M-Zin^M_Efo=0#dF>6D8vconf2(MYjCXXr`3Wzt+?3omd!%R{t zSAhe%PR~n0htL=dpr2KjYaJd*67fLyoazV9xw*O*(zU!G;&~puYF7{p4;E5Q%mO|@ z%&l#$qW=DyjD-E~@OZ(f7YYn)qCJEF57zt-u|kG0!>$0gp`D}b^T(+JrTc*TdL8ZM zS$fHf)w@@mXv7axwfMc^Q9!R(a1Y4gc?jx!u3}-Gyl)cj*EjhD0y5))H9Sutwua=U z>YsZ>x<|hvP!0MfSzr=CpEm1OGD8Q{^?87e>H|63UCs#zx*_}^iLDTb_XUD}$s2Jd zBC-^9gGQY31c(k!T%*&p|K35=R-@9y5EWAEq`-1t_O;u_%p}EIggn@;0NLQfMdDMc zXk5F$Z7W8D-<)lZF%n-v9?!OWR%T|`Her{_C>!}cHRqB#)hFhDb&q3yG}J&|?k<(4 z`T&AnKT4g9XI8E#tQ;=hF!&W>Ge{7Uh=?Xa6zXT&^IEgDh0 z>Q@vD_@OWeVH|frm;1>*k07>L*qlOxR)nETy=s9lAigzLD+w?Z+OgXw~?@MLkbbJoWx_g{rVPBLmH-C0zTfa%#_)S-u{HkF6(q4B(W7SQO$ z2%}oe^KB~NP}Q-g3o_|=fv(Al9)WAyrR+^PFxiiypL7DhVm@S5Ho)|8V(A|N?s&Gt zw63AxR7fo~mtNH?{f_eE3!iddC4SP5`{tj8ir_qpvtG4c6%H&_$VVqTYxXk%Q;o#M z@#1jkI&b?dCi}D0(~&db>OvQsS+pmg%|#?e-p)j(*S;%vCeDrRjt~~*mNW0Ds<(Mr z?YDO{@MaCj>o&&G1}rXTK_0mmP*{GzxBQ-=DUh6QJfMc@ns$K3v(WwE5N|jJS8NmV z9Een0!x=>VX4pWK8IIg1gzkYwoDSyWLQ|feoAxJP`f9fXhG%u(L-I^ZcqdSM_>e(R zMDhf{>^L+MK0b&?8Y`-4g$r2%$L?5nE1nYUOR?~l=2Bow&AhVJFZ4OPMUap>SSjy2 z^csZNTXNvvva?k6B5&0F!#;2=xvW)dQTS4T^F5o77)EV#pzZ$8h)fIbGzV4fR@QFC zM~1j%u5fV~M|}k8*SXoC&KbH>Be1LZrL= ztt|BH=)aZ9);{-Urvgu4e?z}{t$v%OZfRi0Ume0(GewHB`5E-x!N7E+j3)-9tVt<1 za5JvFL*S5)v>k^)Y`o6{U38+f@l2(6{EKcql`NY$z^0tCtc{4OMP(tOiZ9s`)8ac9Klo>Kozy;u=v0E5K=^-=2^Rge|H* zUIxOPj^pQ79^>=#Ty^Tt5Zot;LJV%^y*4UL8H>ONF4z1TNMpAuZnaRd9@fp{+zCs1 z&##pqF-ep2=4d$D@Q9l$KQt@HYM8O4W_tQ&;2KtRx>)y`23r?aj=tJ`P8z$nsezF| z63jSq8`;)q%pQhmP=Z16np{ahAHLRrSxrJ}{QLI-v0A9mT4YVqP9hIlslE>H?yR=L zgY>q@!_lSTw9)Je2ONts{_9mCfF7{yjNZfsG-VL)NjSl`rrG_(xSmvm=9D;9F^8m0 z3n~ul0|tGqkE(w^l7bbM5fZGO6vxcRrfKI}A_jY}#ev>qV)qMUVM1&-2D^_ttHCNZ zm3%xa6>Ez%c89WPY7iwImrO>}nadEsMqVZE#swIYQ?WZY&x*YvH@}?)pgzN$yxU zQ*6&yI_ZC=rKCPUOc6i;0J32L01*Beevms_EG7a-ewYnSTl;mTbgn zBI!&02#KU9q*a85NVRa;lHT=OBx$x1TJ`N&f`bI?hncKQ4h=_IeK z8`4Hde~^Ct#RQuP#zJyZZtA1Mv(8aWNFphOGLP~&2~RUgzj@W;{d3)U6!uy+IHNAt zw7SU1rhqVjRW`O&M!dHyH?9-bY%W(p7-)Tz0v=mtqWL%bu(@BbpFD7)O&o)(9b^{_ zt$+aFdLvCq96@|CMoA>a0OfTQgoI zk4k9CEt%GMtKstpiB83Asc_zXiojKLe`XgTHDb|JZc_W|K#NjuY+T~iU!bT82&AD> z6ga~)smdAjubk!&S&59P-Fld%VzTU&li_iCLfA#Nq{*5Xw z7_G1*wxd-k56m!uiRIVjam_tyQ&Ryy+hOckpgc`R@K%9MAv~owlAd%G*3_3bl2N72X6~CRpaQklX?H?;u2TmS{MfQ0{>}Ed)Dg0<#k(VRU z4Bu#S!x_Y|51ybfvTd<0VX0y_Z%O|JFP+)p5(Q}mv8^C%!TqZYmb{{gMyB1hsycjO z8JXN10)R1E`E;CtNPE;YU$Q4~uMOgjKZsIu=Y~wai<4Sp~z#KItvAc|v zC*_CK58+(>W~j7$kbD`=B#v042)$l$TIebU@W6Vr2k(h17-#cBE`IGkd4TRnL;q*A zjLizZ*R>hQQ%-B;*O2)V%QFx%dv(azDeCB-9+ zvjzRMtXrT8TxQ7_vi&nFCbDSp=0MmpC-%6I^aJ1b)!w1dxzb_AKZt4h;A^bHAa+OyZ(9E zI3kn`k1wnh-RZ>^{6$L!?f10#nvbobZFdJzaE?f!fvk$8E8beizchDI-x$i!Wait! z(h3D{LC>o_UWOY#k1aoncDffo8+brexm8Z7l6JE(hoI3wU^s&TOJ-%$wBb({T^2yy zhWaTN`Kp0`j+7SqIHUe7lPgHrxb(E)=T3K?m{u&-dl3`f|s71KD4m`>zkFL z%jF~afWO)O8nALy))Ij$2kR=A7xDvG884RtTmAfLjZ1K$re-i5q)V`pOv6$^s^%j{ z-`ok*uh|yGAhyLD4(4OUC20%Ai@26*xf>ic!WV2Nuu z6OJ{#dnNNr9RSmYT>~Llu{a~6Ce_us5-1G+J%wX&7H-*+QbHe7{aJe<+WS=^h%h3t zK%*Q}A_`&yYtTNc#!)hJwg8eJ9v{0xzh8HdB@GnO9y$A;F2SZ#?syxwNvo*wRS#WH z%rcvfd#nidV2o!DErLOkmsi@oo4noPXt+|AT?a zMXNfbe;Jta7mWNnJek;jnb+UpsZeE2c8>wY>nl7F-f~YB<5>nu9^UHFQI)W10$=PWNg|@u)T%(*VmFfEWc&8>N3!T)vlz*vaW=Ey zyX5I)OaMdaR$_7x7I0Oudg5}f8W)|M;a+OvpaMdqS9IDj2OO$|2_5yY3 zj2yNz7!+!=)fkkC9={&LvDr9^b)&ehPL^}EwhRzZd^|cWhEY$9HIjI`m{uNqBp%{Q z;v~t69hDXB^im^0pJFPj(%L%j1eklJmc%DZr(v_Af!9n;#8IM1j=6TuJ@o%fkv@^eGo|6)4#)HMj+MTK00sqUEJ! zwB52jhm>CMM9{#O(ko47d?Jok>dPf{g?;;Y8dX7yP8xnhlFK&Suwgq!N_bx6U{+lt zN-Ws|ca{@5g>M!o4bBDjeCgSbHT3!{{uA^aQVIyd31%!29eJ2jH^sF!CLGw=ayD2G zS~~1)E%czlr%33O?QwI8QC{qZH*=l5YvzjbmAEzW+q{tj3gF-10Nwk12?_+fK!Dgm zj?K7-3bqjnRxGT!M~8C#pR{(Qjriqf)-_yvGtCWsnOc6c;_9m;2;zcbfbB7;`_dM9 zIXs?r4Xp|?1~$G%i|E6Fv|z;(B`5tx_uGE!IL4~L;@{8=O8D^Ns6cMn8+qQqzk0B_ zaeHTQjXH1M$jCpFScn0aiF-{W5j6mkNh%IGjnA)Z(bKJ2;8ver3vD?(bQX?k8ty13 z?c=k5L;p{9EGo|q$^2!mXVX*j;`f6{@@%pedE>Y|(;&re_=$F@bBGULHQ3T0Pnnsl zc?0v0r(~sUe310KB4&uG zj$Dl72FLa(v5>-D9gy(GOfRei#xwB3m7-FPu^$)5uJ*Idt18@tZPF16_O6};z`!~k zN}ved0_5X4F=)mtuO#wR3Di|7CFJ+BGF~ACzVJP2hKqezR9QyOGZk+pm71a&OR_y4 zN>1Tdr*1(!++Gu#?TwKUJCW%V`)-*~Nd9{NpzUxhC1uI+YO}YR*_*{kY#U*;@`8YM zIwR^!O!gFj42kgV3$8jh8w?%Rz3xXUIh_=(x8Zya1>YOnnYyTmc|%=FvOOyqpf>wp z)frxIqp>QEhMGq?}Blj@Zp0?&Cdsrg|31{aC z%#ox+oQkV=1a7-!{zNyg|HoCXeLxm5P~W}67<9Jw-7R*))4B^(%!MXBe`k>;r+uNv zUgi+OY~)Ow?{{pyVKB2)4Ao zzvl8#|1mKqGh=IG`oGS9;rzLVob4JLN)P<9D}mEjD#ry2+&O%?E`wGEd(b8n&pa%7 zSw5eCPQn{|b3Q$< z6A!4?jfawzRjdaBx+~49VNmO#PP6|d7G-1PB^0sPvSPl z=q6T7ncR$g!f!T*-EUwuVeuZlrr&cVkWGX8z`&iOVwF~mfpEY7dhnC$jS1QBzC{>N z>=RzANXuzyw0DNAF^x#otZiu$Q)Z}o#IB;&0`&&gx4>IEyKnFE z5SRw2y$UQ=O=g-B^{v_*Gf(D!u+MgB`BllP?G^Sho;g`1TQYBDT(6!FW`vLJ_|&v& zVd%KhhaEe_esmr6)1;;2bQ^YbSMBKfow{lmFWWXXllcS=nKO^XK-X6}!B5HWw-#f%muVwtjjGtnMh4VA^ zAujGPP?%NiNy&-ryHN)q=5NENND?{v(xpJ(v%_(IPf?nrx#Dng4K{7#0v;4Vxp_NG zBf24kh2^7Wy5=tfUON2b(pJ_q5WAyFbLWugw>bQ+IDBE~r9RXVfjrXFB?0Bk3@;^i zVjPX-DmmJx{A8>xM|JHa zjv?0NC$7eyFn+Ler2PnCnohcLk|2-kWZ*EWdbHNC8sYmyb> zScE&Rg^t~~Y7WoO=iN?<9QtmO>2d}XvyF&l(Y3~noBnD~X-w==N@Dgx^G?$9ZvyyW z_gs#1q(G!n^(Oo%E-YQ?JOh@2!=rIy>}n`_@t1WGe3;*$UaxiNh&J(C3rk7 zyd0@C>K<2do=Uj!GbVErwL)ct64rVU>dfFxI?8oylaD-YH4wisS~g(0s{%C4sm_eg zeXxpf9I0AT|H>Smn9}CB-t8P_P-hXQTqh)a2Y=wB@-pdhIfiu}eavendUDGP6IkX~ z)~k;Kp97tPi_W#L4`fSap|o7Bbe>b4U_XPtbb{_CkE|l)j_8>-KhNiMT;XhUiyQt) zbH2P9P0PaS_Fxp=Kf}Q{iZPNpZFE4d;pGI6(*?Q(LnbYOEPb6NsehaYP6ki#Oe@yl zv`7CRL)$E$^&#w6k_rXtzYH@Sjh&p#ZA||fW;Uv>*{pFOcfyY+Z`dK+-cB zkB8VasBC3A#uroZ5n5^8tk<%Cn3m&~ zyU9^UIZ-ep^lk(!4PB^;M^S+la_txFLt)&jNN#eYgdbytgE(cHNSvaiVhzpM`=6Fn zB}erhz)|k1T+6mm($<6UL=i>0pZ;>?_d$0%SRA+qxwxNnJ<^zq-NZ;}1O4P?(d0)$|>o z=Atr?92SD0Q?aI$H#Nh|lUg94NV`LS=J4?1BJKy+uOTL*m3`cl>k~UH({2nHv6Q13w0W!UbZ*$rdCtBww;hwR zvrs#S#FHET7Si3M;?fqEd9+S9g_Dx61fU+`GTXkV%!;zY$`yF<&W}3O-)_oJ`*wUL z)nmg0^{jJia4k(UrbVe%m%6p<$3!p8k(8+UxGakPP>kBVSWRxq%bw!}>ntjmT_Dnv zSISYndJn6B#X25v!NDSDTBmqn)|DKH@6*s?_5qPu*x@E)J*B>n+nxT>(kV8439O1x z#n{U#YhK9=?HeoWZ5mJgwI%OjO}LHi{!4u+^Uv_g9U}qGo_1h65ULk>39toL%VYY` zpdAY2*kUWK2|A*fZ*m+c+-AnGjvtVRDm+}zqD*A)AfK4Q4f|0PBz>-@oplZ?KX1S_ zU&pqUjMiNMiCReky%K}b`IEYGTj)obt;hq)g89V4zei^qXcz~E0ozs?=!^|#FLPxv ztCG**iK0>MrwDuOo_p>t>gc1#rvv#;5ZAP4IU!~bmDZ6e)OJ@c#3JzJeO|YFDeG%& z^{e|Oq`(j3+t87(%$4Om?{2kK*#1Y~QBNy@NBHG8@@2oaCj5iNj!y1Y#*S*{PG%w| zCdP(Ne>stJZ7tiqW+WfJ-e>rL&4zB{J)MG0q7)PMUN#wa$d}1cCmLZSXA;iM-%G9> ziDsHiRe41sv%5T`@uSZs9L!shxH3L3&z?B$H9QkSag*<}9eGzbBnS}a_bxmB?;N6HmXa{EH+ zGMt=Nd*?@l>|a!d*|uMy+!ELZFE}HES5d@Hag6w_2TJMeSxDT3!)LhUm6wMqSr`Lt z9oJ{Ah|LoOfm|d?r_62Rd5HSkN~1oYA$ zN#MO)2Q_qZObazX?nqXzzLJbW3$2Q@LcIG6Pkw@=)zGBB?%6vZDO%Jz(tjUwbdDh> z&!Lx=IpkgU=uBzJ=D%DwT^W0CLt*1*uE+^#JBhxr3ob9D@tXu@w9>F6Ak5{VWPvJj z9Z|~VbH1^rg-@x1RtU%i0AL>KH@+D3&+sd|R0;Dd@p@JPq4%=a>kVv+QMbFhIi!OY z0n6lmNwtIWeY^+kZF(sc#H zl8TT9mak5zPPinJ?TB#O(g6XpZD~k#SpwoJsm4>NGY}@-C<}&AzD_%0cy=Q&I*1lU zX}UB6kvWSNjcmL$3ljb+fP57=I-SJ^daAgwE_qso6d|Th1z32t`-%{H59qjoLiW|S znEH^(zzcrew(enH;aojzWz{}7G=*;s1jV1y#ehK`!$#E^j_(ssddF6;k zcWUF|`=ir_@V2YwElI37evO04YU}~cRtN*nwZR9U9Hy4)E8i~BB$=4RLFp4NODF}F8tj#@ByPKS}w8kCF44xHyO z<+I)ZPFucvsVKCQ31g9rn+RoSu-5S+-J4oB#FD);3)?N@y&7&pV1^|Enj{lpEOxp` z{vy-~KsaPMp(8us0rGFPi;NDh6T|S2@)d4HUIE`lEIi6BMN;X{a?v?~4^*#-lejs{ z)69+(BBT`uTouk4NqD6rm|qAi$e9W16XbD&3!H(j$Mf9qKuGKDq7kldh3;8J7T0|of*g5Y|{DLkqOin;N6k^UI=Z0F__EYyY9wD~CnUvBxIeePw8AR5$RpP4;35wzmB0=N zy;sEirx{~){B&r4+8XhH(TPJbIIPiM55RtiXFnW;C$|L(ZwUNS7r*_3Y?M6fpEtx+ z9+*=W>plIW1&q|Ju?&D9sc3+H8V7r&uXfk|r4(oKE0@@hFFhVi!lo<9e5r|ZJ`jxQ ztM2|J?5(rCn|In<^8F|h$igE`c?;I4pJ7Bosb7)*)>FJG1I47))>^6)NF|LEe`65% z;vh7kF3+l0{~!{9QRy`$6sRm&=Z+TFu#`{2iN8cl=oVcUy~SW<9lVgm;}mx4G&A_E z9e{lFp?~&}7L<^^IV%BSQ6EHhW+qMOx!vR|nCRtrA7`6}>|CE9juwsW-j=nL@)ItC zy&*D{I-sij&ocGg@iy3ZI3!u?1%KKB+|p5(<1)ntoZICGd5evomI~nabo=`K{(F6W z!GWgxJ4gqM#Q_W-Kd73_A?Q5A7ZAwb91>`H`(-#kOEcS^ekM}5`HU4DtA!Bf1dCrt z5qHxp%+DfF>&q-8P(UHBJ(@lRa9GsK0R#n>MYbo3F7F9Xp(NF0XzlK+j*j~y_jC}J zZ8|pv0o@mdwS%7FM1zUxWKa^{W3Qml7&7@IVsfl7Wz6J9rw>rr!PyF?Foh1BEdG8U zYJ!34kl{wjP597^N?dfT$z%~?&B2h~{~!am!uA4~AEaOOCOtu_*8?q)5z3@Vc8j&+ z=!E5?#$=K-hAIXk8x<=lfH%&xRU2iuTqMD&=FEzmFd!C!8ZJk>+c`%`8BL`@ynC_x z4N(YS#fbxZHW9J0b38>eXqBj$eA?HJ8tEv6VtWso>i~g0JXe&r@{{0Yoo`x@HCH<- zXZk&qA;h=Ieu90I9g*3q0C{9OY6uYLE9d(%lFG%Wa+LIGE}f+|T)^~jy_|OeOlaHM zhR1Rm*8=OWxqoi#&60;HX3h9pj%)QjjeLSz*x?;vqD($zQYpT9L;_*Q8cl0ViNEi2fhw`L)Oj-TTR-~ zEW#@NR!h6eqww{W+9)iNPnjv3IO^mM*ol8tE_A#MMv>5J^)Q;m#gNd3i)cZEiD}`b z!07z~=AyUb=!s6*YA;uOO-spxYo`5B_;X3me*{b4(Ur|GNB{tSDFA>!lUIN3j`$la z|IB=zYtK5YH6wZVFu#MtZ6RyK82;IZUniNi#h&os^zcFgXNx+J&W*S~+4;VoT4U^k zN~zH#Q|5$D^T7MMf6n{*_;5`}9HkxsLWtkpk)(%aw~kkY)?uXTqT)f)$^hpVGZ7mSUKX{d*fQ7G<3fYg!2tnB&Kd)+(sdOao-6c{`%b<6E&??ICp2#Fbz%+Q*fF#oEtDGrJR-CW@A|s2 zpb0|Z-Sq}{FeE!V4M=I#-_%y=?S^7JkT!k|aDN3NSm7GzPc}|8 z50H3Pc;?jlV}6Hd69(PCkNcd)WS|K^7ylFy7h6<$E=zM%96ZMzZr&A_u)?J=ztmNX z+RVeX@q6J{f#j<|ltK2WCql?$2{tPc3cjQWl+tnj10&whaRNmy zQy5R{VKmSwg=WL=N(9jnbZsnZ&!0wHwQoDx_?*#jO(xgEXMU9qPwzbWFKA1-d&(a* zdRg`)n&F^Ob+>POVqLLOr_1#}<9r$GE$A__r^!3tytAzWz6#&*TpR`MG5)^jG@n6t z+a5d$&wEqD)10*aE6za5rO^$$|GIUJ@rB7?G^sm?FzN*<)R4!)4B{a?to^at4vzL2eb z#WT+<;G><@E6+$8FHx8IXuT`GZt_=0>$tzJM=GbO9jpLzA7_~5 zGfDi{)it0u7`g(;R+Rki!V0=4o-@C_OQ>j{QNh~oc_P?F6fVsd2mo8|`msh@;`(Ju zJTiY$Ewubx7Uzxyt#^8lD`U?pNk;aQ!zh%C^}Wn>y%I@N=ZFb+Na#n8=Y%&iQ<6GV zm>qYUyZ_lYG60nWJW~sG48T%}!UdqQJFt1Tz#0d~)XBl6&pGd)GsJK{I?3Z`a`=bY z+{!b8J|^P39@Nd2Wd1Mx+xCbSI@`FE@T6j(AlUj^R&rcDc3)<4)I z4gU{o{t^9eWc#;h_vk+~2&O9Y#d(n_eSZNeJ*Y$7=oiM){JZ5?vYYP&|KcjvzcALJ zI23D?#1<%)nH}c~Gvoe+nN5FVCRpCDeVIQj2wd?+ugQOS)j%Kai#QSJ+yN0x;vzEM zV%htM{%Gg_D!O!OXy@MeR_m9c6oUK~bLpl90kABMY(7vjn9reQb{_`umA9=S7ea0Q z4B#vho-#6h(HqjtA#!mt0HIKrF%5|<8PDEd$TqJrQaJLCuhu8_?fb9QP(tUYL)giq zG+=iQfKt&r{^?zJXd(MG3w}X~Ktvu^P7Sz~9fVWZ$n~T^M~jB^!oXmTX7$bCp0(uC zYi@$wVK(JPz{=80r8@p3N9=oUagL^0kY*UyT%y;!63HotQNt~v6KC)m5$98#(!QYR zKgjlz8Oko~!9q&_!&f-zg#{#q*6`aPWY2mbbuGfM-A#?jYFdS@bVR_B8c(#ZDaWnG zXqC75cOTw$X%@U<8;2TtP2qC#23e4`-Tp zYFAsQs(?Yui678uVb3pOLr9Sg{1o8+} zJQ5r@+kt7xkZ?s*IIXk+g`DWEWczNtyv@qR1hdvdCn!hh(jr1o!zQ%5hc9R={R3@> zcE8o-gpvkkYw?`q(L)Dj<&J zw_)++ggz4a`-0d_;KWlc9{FwVv4@R!btsKWskA5Ok)Hg}xRKDbxl|t1 zi?l7ryW2aAQBw~a#fCbyB6FTM@{d`wPn8>( zC!Pon*<1z1w(Xg`iRG}B=(qPD3m^EV+I8PQKK}Dmf)gL7NB4p$@;--Pt8P>Yp z@$!HsM@@6i2>JX`xWU(7d6~MR#4@AIN)Z|DfiDKkcXYLgh7u9gl|`BF zPtJ7SaghIQ$Et|tW*@mCUzm*{tT5}>IT=hcwIs%bQ7%qpB2x^PhHDe?sk4r)(}y+x zqd~clEdl-u*ury~nK|c1z4xPTT6QRI&{0` z{fO@d^nY58L#FL^yYuyODqmKB0YCxpf2`oo?=SCQYv=fv5fJ2)08r!-{G*2d-_Ok0 zK8wFLst0xn&1^K`wwP1cOEy=|oP8|=Te3$IvI1Yj#5KI)4b5ublif{-?Z#Ib$w;e* zKL~V52HrpzO-!nuciF!Fa1kUmB7|BC8`Bu-w#-3L%$EX4(BAbmW8?5J1 z)DOsaKbo0p#Cif-9eSl!7z^6mD1 zUuEj8uKbTL1^v%upH3~Q?=oMjnXbS7>gT_#X0mZoHu!F3{AcvDt}+q5%7C(=cFHHb z32)zD@U?Hw`Ix|YUPOei7b8?X>SC15XqoA?y2Few?=(NDJ*`I|{o+lqW{*D&kV@T& zQK%5$=p~zj{3tt9PdkDqmD##A{u>G_15|3EMgKu#C~tI#sSXN;)PB&uWIdiHLm=G? zybr(Zozz1zVRsc17W$GK;ahm)fU5U~y+pl1#9NTTN1czPH>7V2Tq4;2_|{x70<2 zB1$qqTsS1TBE;Dvv>8~zrCoP2Md1QVN3>4epc%xI4cmVH9u=Raf=Az4w1Sn5JhkED zHFiyjVqy}xH6Gh>&QDte zV?=FWXA2Oscht%5_4dYZu*PzK#!r~zmS+Th^!?)TbuFjQ;@Pt4t*gCFgVy)|+ndm( za*yAx-}~;xJ73;~YChig6{gC*l$6%x3OjnPS;q6H>Fgh+)lN&K>N-Tt|HS?|mHTX? z+eInMC4ze-Yp#Ek;_YZKuzeyXE_F{^&&99!w6OQ(TRP?OcK2)lev{8xw7O?q%tw}( zp#8-@|3STK%*1DURj4f*m@J!t2~ZGNf&&jE0B2bJhtBZ@f zO1f}R{md$cg0_WhoTnmvl$z?d@bzq|=i`}cyi{b}WxLB4-&xMyywP$cf0)mqZMloh zWOFM1v+Q3#Kdx=X!hpZ)Bo-Jw*GXnuk?oUdt-bQp$Hb zQ2A}vi>9p`E&r{3d7S5s>;I^yIRQTI^^XM#_J~zH&`nQ15d2(sZ~cm}6+Vwt_&7Ny zZ&BJVIWx8J&&AujXS`SLVXZXzsc*=6a?h*6m#QCGG}w<%zrXAcaD5$?6qza!u>=_4 z|A1H+qc$O_B3X0Zm#f)8pzUGp!g3DFhKYJH9b8U#uQ@GPoj1v1mP4J2q+|Ylg{A5* zihcl(d2+jElN0EuB%`Cz()l=X;nMg2ZWeD|ET`72_M>oKZ_~x-#|v*vUizek|Dn;j zX_Lf{OiA?C_9{!SU6tx>`Y6hH+7veRAi=N?m9w2om_AF2i=F#myW(u4dztp>E>GpP zoZcdR-T#^jI=zX=R%-2vmp<| zgYtv_3%=C8p35^adHcc>N2V+K)H_^1oqKedrssC6`n`9THT~GMeVM1-B%kNanK7az2GDyWRU{Ju^F8#i}{gYh{SLal8FBP&#P zJ9E6evR<-AQ1iq!*Pj2Q=ZH^aTEWq&F#lp$M8`h|rm#lH@9+ISC4QR!@&(IPmrlKT zc<-81F>+$(oKL>6zdpYfYkhi1n_b}rQ06f+Xq7usk(O9c0IE$#OEh4@0)jcgzMymC z_JYohtH~EhkG##=)1`Ry!r@)49k=J1&g^TwIZ0uowbLvIk9c_ z^mS*;Bo-Bx%@m)UId9?Bbw*{o&&Z$37r$O9=6Ta=>$yip_mj`$KRM37f$vH@*qUS%oncg*HP4qKs(Vk65#FlNdvv_dLookcH6(jTOzkV)OqH zpO(KBTb+FVe``s2|6hCU?w={A&Ohwxv)tG6Fnv-zJa;fM=>kg=>_v@sC)6RdGBErCW;vwu>d-Z!p4x@1Q69J>1zgtQIKvCwB=p0S5N2HhP9lI!LOxmv zT|4?dO@#Jsl2GkPyEf5vqwmu~=>8%L)s4Js3tc<p+b_UQURv9eoWELi=q25UYo&gK*6r$@#ZOJ2aGcaU$F)(1X>oK&WG`oduH8Kdq0|1E1gyjGL literal 0 HcmV?d00001 diff --git a/tests/sample_data/track_files/word/test_narrative.docx b/tests/sample_data/track_files/word/test_narrative.docx new file mode 100644 index 0000000000000000000000000000000000000000..14e7a0a3bccba489fcb6cd41c7fd14b1500f2f8c GIT binary patch literal 12736 zcmeHt1$P`t(rwF<#mp>=nPo9Elf}%;V6kM2nVFfHC0k@MTg=SNOs{8lXJ^Jc=ly|i z>zt12%FG*;nH7<6GvuW}!B7F<07w7;KnPgNpS02d0RTR{2LMn2knc2wY;By3ZJczK z-0X}Uwdq`~tv=*}y`#tmyaSg1-}QgE2kPU8ZGJEyirghUBE&Q)8|>$nQoRe~O{7&k zfWY*4r}7fp|NXTc<(+~uNDQFCXV@E$h%cbX~`=P1b10tysqEtxSBbf31pV67u z@7h*DlU7OXDCHU;aNu>akzwL$=RSJbh!bHNEjUi8nI=g4>(!%%9C{v1XT^Ia$wxaZ z`8nd9M`C00CCjTuLL560II$>D5A+cgy6BgLRe36DxwTa1)5tF2u3sw`kk&sg(hAdq z#EdPc=2ocXtYp4XKomxReCHO~TEau1j9JG9aP7(Zqe=5siCzqUN6F7!4oSvt<`%VF zfr>E$&2hUO-<@Af56vI5zXmbS+v4k6t+J{ZGhYWV9!&XYr-{#+jzip7XY=C?>@Gc7 z--7}GZ*QOg`MQv_p9L^(cfNK%)VX|FtYm+Z})ox(UXYay=b0a3~$(jJJ{I)>#9ZNgWR#7r9< z^<%YGyGQ9Tu9+#qg7St2+lqmnG(ElvYXz3zrBPP(?h6>wtuZ~vGzs#CZIb_OHRdvF1eATG8JM)d!35+hqfXKUc> z_1m=dhr@sX=Psb0|Jzr2>@d(^fMa*St6#DgEti+P;I@?{;nM62Km?z!mJxv`Q ztIVGkvQpDGUxjno6@M}YHN+cn;<6a?W=qjWMEUKq^r?Vb>o|uwB<#5`$zsW8GW1re zAiCUq47Py1Cup=yU0wHMvjyek{Y1!T_ZB{Kz%HqVr)l&)jzLN|dQU_}D)c8om?2Y< zJWXk5UT;aKer8#Rc5UVq-D@L3-&u9CW3&2X&P613TN6rD);KpKC7X zb}vlwzxWGnb_FA|sK8SwwDUYa^m|!&Os^OUvm-*r&YK@cCVaV?SDER7Lkt|Gh1V8E zY29j~>!n&b5yU8=G|sdzYfW6~4R588=<)m{zN(*#%6^xJHEPI!)`vW=x&mN|Z5+K!C}t8 zwInniwTI~OkJnu@`hukvFMCQ_HIz5UICTYFrg7y!A|x2e*bm`RT(~X;Azj)2-`~bU zTyPV{3u;#FzNJ&k)p5t8Rfjwiy|ND_yk!{PU;}M`QmsNi*^U0s7Il`YPW`uv6ctC}} z(LjBN^qU9)Kl-~J0bJe2X|4fFSVc`-pYJnj6QNOxuPj1b%~8#`Dcl;n^5ZlqR`-53 z<}Qoca!u~J2_e%SI^|tuSB7n#CtnYYKnsU^f4Hq&ce=OO4dJ_La9W-pm#2Fu8yF4O zvcA^hYIBjBHhOXri*=NKX!G(6FN0=U6y3*w6PpE3R!uN(jM98d^$njWvj3v_(P!7b z-J339{tJZuyB;<;H~u@VYoyL#Ap{3kH|s)iraX^o_Y~=9CP~lruI`=pw>Xg8aVBua zhLWy!ZX14eOHJ*X?Q(_mdauxlWhr&7)$t%|T(JV$sLBwak;bQp+#SKfcj@YIz#;a}Dyc8u5Te|;xuC~No-zA#=0 z^YTS?g?1->vy>obWzg(Pc2JMy_Rj&q#=Q6}rx%{_Rt??^DdG$A9$DbJ@;T+$G^$lj z8voELWz?3?B|vg&NOWeh_n4J$vt*{D6#w-f7fy}Wrpc4w0Knh}008X|7k4r! z0!xn;f^FGB6oh5$*^VVJtd>Y8h&jZUV5Yt4iPH9i`sS<5*IxSCTH4ym;<&N-lFTWV z4D%y4Qjcm<35EWxnFTMEu71zAo!?2rn= z-hL!vS(Dl=o%a#8BMwSA#mQ%bQDVWke;@|+^7=|qyY4<;zl8^X@B{oiONS}qYSg_JGR(OK zDo!U)pIMasQD$XH!=*|xd5)Jo@A#Gtp_7p&*ZldV9( zx#pwJftf|V;*Xh}hPY-z14Ewx4O`IxNa`7kTUTwnZl5!UKK|WRh=KLH#IHTCV~rgA z(`p}Jl0m_g18G-$gEwBKh4IK~S(}DrMugH@a=t5E_mH?*zOPq!x+kc*{R+J`gCIvc zKULy#+!?3DDKcnO*G~DvTID0imf__A#z!v}IU9p!F`A!ssk`Nc4{LKQB6|YuYBxpr-f8gcPiSoZDB?vCJZRUF|4!|+K)ik&6~s`?rD66GZH_MB*pmJg zhGWt?i231t%!IeUNwu_c6=Gpk)Rn60 z+}ZwcBffm6D2ixFE11(tKWty^aCnJ)B^(f;;%lcS(b4+ET*o5m^ES`d} zDU!x2#e-4i4`&Y<@+O2)Cr4EipO%zvd5(jgrqiW>X<<5Zbgd7Ev-NvXQ7(gfYQ}DB zdI^Q6qgr!1JFU5zo7)(Iks6K30y2z^^24Gs7DZ}^+liV%CKGt}i%nf7)1@#5%BqK} zvtibsT@i>n5M~b3C38xCRG~-u&QK4spzKd5e{P}A;2_7l6X-p27Z^p5Oi1LTaC5Fw zw0Kx?FFAU;Iqa-bd2Cx0Vh9c2EX}Iq@h5n4s`0d~>Tg-G2CtIO{XQS zMuF%@NJ*W%-2JA-fTUPK>QQfmc-n9zAxKc3JyF()tWd zbXHcI5t>VqN*1rsb|AcR%1}HLY%edtTdG_SlPa0hBZw0dqef!}eM3M`xMnaHf*V#( z^~KV%hWKj^f^S)R;*ce^w=#}zi)!uT1gXmVE6=0qr@*7ifRRRITl;ayc*Xo|RI*~d zRe>R#?ibV0#>Ts!pS?uNpnJP3c~0DUU9)`mQ+)NI*Vc-*vS%j*2DN!uJhC!3R1mD+ z;lTThrr^DYUM(_Mf00kiZHY0k9jpOvYGQ<2;Y(4p^7F1pF=9zzSGt6o7*uJGG9C!3 ztX}7d7rA>xy%BTGD#SNBFL{q>+QdPmKrh9!ryb=2hx{IQX+NUJV6}XA?HFJrh#8nzN_gu*O z5@SIbKOG`rRQ;rVh%c3)z*k_RBAgkMV(zZIm+94os0tyGk{Eh+PL#5JP>$5x>b;$W z4Yra$E2i);rPs0)LGyBdZ(uOYD4*yq0Ugx*y~|fgIBQj_-z&s-$u(o!a4G~vtH zqe-Su%O*_58m!2dTqBL|3Lj7&+MVnq7VZc-7Y?58qKiWka|dA3G9+8E*c4LP7um8* zxuIloM585ptArOn4;(wes2r;xTTJajk~~2?ImmLPgJ|zQF3+4r64YqE6&_XJ_jTnZ zcoQ-)C~HVO-o}_ot@M!MHtT)264yIrB>p*FmBP>H1qh$VxK7O#O$%K+Ul)T1uq9U6 zk*(QCCWB0%k)8X6FVv4>($W{l3ua!yifsj9<%PW$XW_T6)>;qGRxGzpA65IIx(TsR zf$Aw_q7}kZ-hJ&&#B%CiCzE1W?n58fD8lt^)X#QB`k^}l(S3L^<1<8@{x(ri!mDz2h10I@ns@avLdtsl zP8vZ`$IMx~<=r}U>Z(z$Fwt*EmOps@Ak@v@l)SHKL%6{=>T? zF(0=`c;lc{(ty7ALcbPt<7&f#)H*%&e0{82co94^&O{ernK>syDmMp3&{7w2A;*;!YiLfqk}D$D&|{nvXIm>GM=C#gJM}D zlLr1+qPI533ZKfyJ$~-!Ao(17K{)Qqqek9<9aD@G3qYL{EWHW#93jtuKxQ|Rtd^$6 zj3H)cl07>^wlS!r9VutkQ}Bi1mS{;%Lt6NdnylE*7R@|&1HaRo(8wW_wfOO4xa|VA z89i(e3xts`VIFCL zOV={=1<>eBjvC5^q7<6#0fsphcJ0qZSF9kBn&v+dY6EqxTwVHbWnYX`k$a{t4%0i_ z)eZFNhqGdCsl?J)n^O{?1@b2t!Z~MBgY>m3suCBHW2dYRA-+O-%5Ay7i>Te_>lh&? zISFV>WlIiPDC1w|DB$fz|I;yBxn{v&n$?8-3+c?Wwz&&#yi$e}igXP>Qj13d-I`sNxlDH5K2q77|xo z%z`ZMOrIFM9&XzH{Ccu+c_<@sJyd}rTEvcz6y9uNLU~Qq+mTgQhb21psV#rfEOQgi z<=*1rWyzR_KKvG5pzX6`ucDT_?r7dXO)T}HXyGO{8H^#?&AFLsOzy3*b_Qb})u9Mo z{&^Y&p;0JTIFG+Us33#4>AZ&hiIA633s~+%C2GQv`3+(H;=Hjj+}2oEYJ!zpD0EwI z&lXQK@(8vmG@oy;Q|LSBAwy(Q$=CPNk)R|VUnsY1`x#B}h<%5&Bh~QxQ*1a4TDm!8 zM&LUywh-fVf|iO+L_$Y>D(h=llK6dj1ofDq*LUG9(y|!*uSHyR6nN&81iI?vmvh8) z@^nQN@Q|rWBeoc~zbmE~#CCop-7^yGJD1U8=~IXskhH+F)w3ot?|!c=X!BSMx6Ggb zH9o}qDp_GB-1JGdT+iUdA$HS4P~0KQxH+PSvOQp+*UDi^Bz9MO<0>yK-Ih2%q`DI- z%6?e?Yu)Hq7tsn7k4Wy|^rfG|`yrYNU{b}`Q4C9#K4%)=T6vaiIvee)tJ=V=!sHsP z&7fRnI~(~FPUJ-kMkqb7=I6&L@-1y;QIpljaNG#oJfsQ+D(<1>_;OtDlwct{z(b#* z7b?-mGK#-BqVNPOA3(qhfa9%{K<62@J|S8YINjYmoyzuMyybs;n)vsmO`t8?mFAj5`8|OLFJOw}awIk-~Qz4&YOKr^sxcX?q z7ZT+ZdzR3+Q}p1A)9f~})sfli-9R&5N=T|*#B*r}%Y-}AksiRYiwKo>U6}-Ql?+T( zC&~syS@Zndw?G>mQbW0ONH0ubXV}1gUp8ds%HQU^Ziqn-s)jx>)i>=looZPRE~E}I z&EIpw{)o@R3~{2Z88h`TwP+|$J&fF;@MA9B$!^J^(RvQ<;Xd7mc6NgO z>b(@~9SDx^hb3cUT2}h;DDD`a_S$h@22s$n}B2-qY62=~KpsVRh^p zwZdnEy#!M(pXb-Rj+aL6@yoen)34z93vS3d$0Jvs_U%nc0gjo~#~E7ZN3-3%`7y0DZ6jkd?#%(oG{O-;rp-FO)NM1_{tmzwW|x~~sU zGyhzsY57{`bC~s33CbF~LC$L^l5V=2=Pp0fe;D_2rcvmIPBX5K4sBm^MS+aBb?1rY z83#2*Si`+XGfdCi#o3XrTd3Zop4>I+VUolsm34i`r|wzPJ+;Ee*XUBa$buF_#@xWw z!V>7AoHqQ1!=ql#o6xbi-#8nhGMbNC^_ne{0(@8-6nvSOeVE{8_(k)%D6NbVFWV z)8fR1=E?qc#b>8S6)sMXGQ2jQGCcK>H%Bp|hgU96rIBH! zT-u7TdY8kJ+^h94k{hf{`6nAk=-z0K+>3PdhU;Dd|B(80n<@z@O$3XU<|4tVi4bEq zG4Yo4RpcT?X#VyP%Idw0FM_{>x~8Rv3(H64f+ppt=jFsjwn=I14#MEwS`rxujv!q_ z-iv;_qke0q@A3kv5xuiP9xDa5wFV5y(>AfLF9S8j zc4g=SPP49W*n*ilhP6r%xt=L;gINaq^|EzcGRb&6PhyTKawk^IsYjyH2+LAtkU%2| zHuibXh|x5qxgeh~`)lQJRTVLR6TfZ3MOCLy8dXneMYQ zRZ6It8A521YS(spFuVR>u9g#`nbcly!5AM{8R75~+ovD67`ezZ#8Bn-^9U(q zX#Vnxvpe;UVu}ViOf@?&ih{ zGKrfP35Bsl8aXt`cOPGF9w@Rv=S!SR6InpQ+1QwAS8n`NT5oL<<(L^xpGRPTe%TUE zT#4)Duyp9|R;#D)oQ$7-isLkYrNTXWrN^X;cVcTGd)K5f``%;N>Iwc^-}7~K|8b;P zuJ|Ev0Zt+t6^OGGY=cE$%cfIBqLD}rO1r@;*Q8mgI!HTQ_chTj>kL9eBcqY#>H0jR zWN(b0{U|I;Ibsj8y{?@?1Iy*Kkv>`_QJc_p7DI%#1T0D4Yz&?M40bAf$vfF~`4$tc zg%Xpah0@P?NAw!%80U?0WF%4|^|Mb|89D!IQ`o*h%2kzMV*iOId3xYlrE?yJta+cG z?AXLPifRST7Jh%%L0<%By32BK7t}xmW_(EcZgKo}A?%opP0u>Xj%aorS~`;50`%fm zEn5BEpi1;+T!m=)J7Do)5Vm);O2Pav&OMS{5&TeS$sSN>ia!MdAnWA`fjP~9ZYZ?H zEq~|}FMnuCUcms=f29KvXhi=i#xr*->qws?2(3|!_!XT>I6J^%E{qt6uon!t{S86z z*F0z>vm8ON--;qrG29_j zSd~AtpqKKfvkMmg#3xrWJ0n&Szd-t{%70e~8i^w3-Cyf?-D~O`I_aG90`gj1bQSut zQB+wf>Seh)v?7cLCpJ^zP#3rscCIkAmwnaOO{{4P>lbWJlg}f3!d|huy(mQ_PtMz+ zbF;QFl^^AquINnf&tnR`-Y-Pk#h9?w9&M}POLW#uKbe-OH{-UP+>=%)c0IxI2)dYb7b%F;%L=}6lqNN<7i(ueX=E;6JD&yjUR33MNq!8{aY1KE)y2R~C|gFew`8ycUHH&Ii4f|-%& zY{f?Yt_8>NS{X=Tt11+qRi06sY+&QrbzI6PUK2%*92v%N3 z2y%27gHnsKbp}lS{89L!1)J{lKUo&y=MEn`@4sQ^e%FRm@U8?`W?Gzy{^IA9a1hx_Tu@tGj_>fZ!*&urXy_L4Q0u~uGZL~E_T}hD{Mcbm{j%>a7d=79Z{B;UfHKC)%G7@ zqqnQ~c2`VCoStb^G(jN{vH@zX^v{Y*pHu z|A>~~-rB15H}Oj)To#29x!+mDG9*AajfXw`sC66b<4ZsHD0L;<%HoDL`{o{YxU*V& zV?f>AcvdvkF@BSrLY~cLqZ&Fy-l@X>)SYH_^Nz~UN&{Jmey5am30$?v@?#Y0%`X!>HZMw4-;)RS)7pPG?5LaPJuI4-s| zF{_#drcxbOPTj1(#QjPwslx4MzF~8DJ|50vcPPBc+HxoKSha1Vno}h0r4<-6WL>7C z(r7bU^>8}VwlFx|o@qt*+4ht=Xu68LZVQG7d8|R|$03p~e3Vkuq4tWU3BmNY1A_i5 zPiv1k&Xwa+s`8<=2H#Svj=YtYK^PwEMz=h+_@U`di}fHXRU6A3n{gI8Q1LhWI8p5C z^3^1jMizH{Qb`}5-mUw{gpNd*J2^w%;bB{Z+mkpv*7mGK1WKnxYOXX65Ra3-c!H;` zd{1iu=ahg?Eo82`NOcSfIQ(Dcai8zGXb zXE#ez-!sPiB&NG6UcZFP8nsE%3<4K170Vev>Zo0NgZQWYi;nsIjWckoI1HpF5dX@h z>D$@;k4gh>=>C1BD-O#7!>_&*Umz%)PE(@MW>-)v`LmzLrY6=I+akmCH(L2Ec9;s8 zL2d@han-F(j4W0!HjzDbA~w_wdva19znpoLgVB4 znzYUxn9pcJqQAvtt9UsVF0!CUz3IQ0$e93BAr2-qs-JMSCTcOUy1|79nLm?4&Tl*s^ zmnG{>k8L&%=!eozbr*uM0c8e^kz9wJfoe+IKGh6}c5-sa(nWH0jiS!qubL)k?dINO zcr)anIdyTk9O6ChS)Nh;FjbIuv_J;qpF`dM_%wgq{~_>QUh3}({vMe72k^Ih9FQvc zOPKDjz`sW<{uNjU3|IO8#V`J<>DMT>KXn}dnZf^xY5NuaYlO_7@DU&j@jLw2c$r^S z{Cd9lr-}(+zyFKC_zO|=EBx>5`=4L{;1Ux6_%AO0SNN|y=${ITfCS6$Z~q%B`YZld zZtG7x3jV+Gzc5|DYWS71`BQ^8!5{7a4}tS5`0wN8pHKjRm*y|6|M%!AFZCX19=`>F P!vMN~BkTd)Z%_XZd|l}U literal 0 HcmV?d00001 From 170fca7f97656130846aeb562f5a699f22e02919 Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Mon, 11 Jan 2021 12:16:04 +0000 Subject: [PATCH 02/20] Started implementing Word importer - done most of datetime parsing so far, plus datetime tests --- .isort.cfg | 2 +- importers/word_narrative_importer.py | 216 ++++++++++++++++++++++++++ tests/test_word_narrative_importer.py | 88 +++++++++++ 3 files changed, 305 insertions(+), 1 deletion(-) create mode 100644 importers/word_narrative_importer.py create mode 100644 tests/test_word_narrative_importer.py diff --git a/.isort.cfg b/.isort.cfg index b896d6387..baed56af5 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -4,4 +4,4 @@ include_trailing_comma = true force_grid_wrap = 0 use_parentheses = true line_length = 100 -known_third_party =alembic,dateutil,geoalchemy2,geopy,halo,iterfzf,pg8000,pint,prompt_toolkit,pyfiglet,pygments,pytest,setuptools,shapely,sqlalchemy,tabulate,testing,tqdm \ No newline at end of file +known_third_party =alembic,dateutil,docx,geoalchemy2,geopy,halo,iterfzf,pg8000,pint,prompt_toolkit,pyfiglet,pygments,pytest,setuptools,shapely,sqlalchemy,tabulate,testing,tqdm \ No newline at end of file diff --git a/importers/word_narrative_importer.py b/importers/word_narrative_importer.py new file mode 100644 index 000000000..355a83add --- /dev/null +++ b/importers/word_narrative_importer.py @@ -0,0 +1,216 @@ +import os +import re +from datetime import datetime +from xml.etree.ElementTree import XML + +from docx import Document + +from pepys_import.core.validators import constants +from pepys_import.file.importer import Importer + +WORD_NAMESPACE = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}" +TEXT = WORD_NAMESPACE + "t" + + +class WordNarrativeImporter(Importer): + def __init__(self): + super().__init__( + name="Word Narrative Format Importer", + validation_level=constants.BASIC_LEVEL, + short_name="Word Narrative Importer", + default_privacy="Public", + datafile_type="Word Narrative", + ) + + self.last_day = None + self.last_month = None + self.last_year = None + + def can_load_this_type(self, suffix): + return suffix.upper() in [".DOCX", ".PDF"] + + def can_load_this_filename(self, filename): + return True + + def can_load_this_header(self, header): + return True + + def can_load_this_file(self, file_contents): + return True + + def _load_this_file(self, data_store, path, file_object, datafile, change_id): + ext = os.path.splitext(path) + if ext.upper() == ".DOCX": + header, entries, error = self.load_docx_file(path) + elif ext.upper() == ".PDF": + header, entries, error = self.load_pdf_file(path) + else: + self.errors.append({self.error_type: f"Unsupported file extension {ext}."}) + return + + if error: + # Stop parsing if there was an error during loading that we can't recover from + return + + self.parse_file(header, entries, data_store, change_id) + + def parse_file(self, header, entries, data_store, change_id): + platform = self.get_cached_platform( + data_store, platform_name=header["platform"], change_id=change_id + ) + print(platform) + + # Loop through each entry in the file + for entry in entries: + parts = entry.trim().split(",") + + correct_length = len(parts) > 5 + has_four_fig_datetime = correct_length and re.match(r"\d{4}", parts[0]) + has_six_fig_datetime = correct_length and re.match(r"\d{6}", parts[0]) + + has_datetime = has_four_fig_datetime or has_six_fig_datetime + + if has_datetime: + # Parse datetime + timestamp, error = self.parse_datetime(parts, four_fig=has_four_fig_datetime) + if error: + continue + + # Process rest of entry + pass + else: + # Append to previous entry + pass + + def parse_datetime(self, parts, four_fig): + day_visible = None + + # Get the parts separated by commas, as they're always there + day_hidden = int(parts[1]) + month = int(parts[2]) + year = int(parts[3]) + + if four_fig: + # It's a four figure time with just HHMM + hour = int(parts[0][0:2]) + mins = int(parts[0][2:4]) + else: + # It's a six figure time with DDHHMM + day_visible = int(parts[0][0:2]) # day in the visible text + hour = int(parts[0][2:4]) + mins = int(parts[0][4:6]) + + # Deal with entries that might need to be pulled back from the next day + # If something that happened at 2345 only gets entered at 0010 then + # the hidden text will have the next day in it, when it should be + # the previous day + if hour == 23: + if day_hidden == day_visible + 1: + day_hidden = day_visible + + if day_hidden != day_visible: + full_text = ",".join(parts) + self.errors.append( + { + self.error_type: f"Day in text doesn't match day in hidden text - possible copy/paste error: '{full_text}'." + } + ) + return None, True + + day = day_visible or day_hidden + + day_decreased = (self.last_day is not None) and (day < self.last_day) + month_increased = (self.last_month is not None) and (month > self.last_month) + year_increased = (self.last_month is not None) and (year > self.last_year) + + # Deal with entries where the day has decreased (ie. gone to the beginning of the next month) + # but the month and/or year hasn't increased + # This suggests that there has been a copy-paste error, mangling the data + if day_decreased and ((not month_increased) or (not year_increased)): + self.errors.append( + {self.error_type: f"Day decreased but month/year didn't increase: {parts[0]}."} + ) + return None, True + else: + # Everything makes sense, so we can update the last_X variables + self.last_day = day_visible + self.last_month = month + self.last_year = year + + if year < 100: + # If a two digit year + if year > 80: + # If it is from 80s onwards then it's 1900s + year = 1900 + year + else: + year = 2000 + year + + if year < 1900 or year > 2100: + self.errors.append({self.error_type: f"Year too big or too small: {year}."}) + return None, True + + try: + timestamp = datetime(year, month, day, hour, mins) + except ValueError: + full_text = ",".join(parts) + self.errors.append({self.error_type: f"Could not parse timestamp {full_text}."}) + return None, True + + return timestamp, False + + def load_docx_file(self, path): + try: + doc = Document(path) + except Exception as e: + self.errors.append( + {self.error_type: f'Invalid docx file at {path}\nError from parsing was "{str(e)}"'} + ) + return None, None, True + + try: + # Get text from the header + # Headers are attached to a document section, so we need to extract the section first + sec = doc.sections[0] + header_text = "" + for para in sec.header.paragraphs: + header_text += "\n" + para.text + + splitted = re.split("[\n\t]+", header_text.strip()) + header = {} + header["privacy"] = splitted[0].strip() + header["vessel"] = splitted[1].strip() + header["exercise"] = splitted[4].strip() + header["fulltext"] = header_text.strip() + except Exception as e: + self.errors.append( + {self.error_type: f'Cannot extract header\nError from parsing was "{str(e)}"'} + ) + return None, None, True + + try: + # Get each paragraph entry, after accepting any tracked changes + entries = [] + for p in doc.paragraphs: + entries.append(self.get_accepted_text(p)) + except Exception as e: + self.errors.append( + {self.error_type: f'Cannot extract paragraphs\nError from parsing was "{str(e)}"'} + ) + return None, None, True + + return header, entries, False + + def get_accepted_text(self, p): + """Return text of a paragraph after accepting all changes. + + This gets the XML content of the paragraph and checks for deletions or insertions. If there + aren't any, then it just returns the text. If there are some, then it parses the XML and + joins the individual text entries.""" + # Taken from https://stackoverflow.com/questions/38247251/how-to-extract-text-inserted-with-track-changes-in-python-docx + xml = p._p.xml + if "w:del" in xml or "w:ins" in xml: + tree = XML(xml) + runs = (node.text for node in tree.iter(TEXT) if node.text) + return "".join(runs) + else: + return p.text diff --git a/tests/test_word_narrative_importer.py b/tests/test_word_narrative_importer.py new file mode 100644 index 000000000..81365900a --- /dev/null +++ b/tests/test_word_narrative_importer.py @@ -0,0 +1,88 @@ +from datetime import datetime + +import pytest + +from importers.word_narrative_importer import WordNarrativeImporter + + +@pytest.mark.parametrize( + "input,timestamp", + [ + pytest.param( + ["041014", "04", "07", "2020"], datetime(2020, 7, 4, 10, 14), id="valid timestamp" + ), + pytest.param(["041014", "4", "7", "2020"], datetime(2020, 7, 4, 10, 14), id="single chars"), + pytest.param( + ["041014", "4", "7", "20"], datetime(2020, 7, 4, 10, 14), id="two digit year 20" + ), + pytest.param( + ["041014", "4", "7", "85"], datetime(1985, 7, 4, 10, 14), id="two digit year 85" + ), + pytest.param( + ["042314", "05", "07", "2020"], + datetime(2020, 7, 4, 23, 14), + id="near midnight mismatch", + ), + ], +) +def test_datetime_parsing_valid_sixfig(input, timestamp): + imp = WordNarrativeImporter() + imp.errors = [] + + output_timestamp, error = imp.parse_datetime(input, four_fig=False) + + assert not error + assert output_timestamp == timestamp + + +@pytest.mark.parametrize( + "input,timestamp", + [ + pytest.param( + ["1014", "04", "07", "2020"], datetime(2020, 7, 4, 10, 14), id="valid timestamp" + ), + pytest.param(["1014", "4", "7", "2020"], datetime(2020, 7, 4, 10, 14), id="single chars"), + pytest.param( + ["1014", "4", "7", "20"], datetime(2020, 7, 4, 10, 14), id="two digit year 20" + ), + pytest.param( + ["1014", "4", "7", "85"], datetime(1985, 7, 4, 10, 14), id="two digit year 85" + ), + ], +) +def test_datetime_parsing_valid_fourfig(input, timestamp): + imp = WordNarrativeImporter() + imp.errors = [] + + output_timestamp, error = imp.parse_datetime(input, four_fig=True) + + assert not error + assert output_timestamp == timestamp + + +@pytest.mark.parametrize( + "input,timestamp", + [ + pytest.param( + ["041014", "08", "07", "2020"], datetime(2020, 7, 4, 10, 14), id="mismatch day" + ), + pytest.param(["991014", "99", "7", "2020"], datetime(2020, 7, 4, 10, 14), id="invalid day"), + pytest.param(["041014", "4", "99", "20"], datetime(2020, 7, 4, 10, 14), id="invalid month"), + pytest.param( + ["041014", "4", "7", "-1234"], datetime(1985, 7, 4, 10, 14), id="invalid year" + ), + pytest.param( + ["049914", "04", "07", "2020"], datetime(2020, 7, 4, 23, 14), id="invalid hour" + ), + pytest.param( + ["041099", "04", "07", "2020"], datetime(2020, 7, 4, 23, 14), id="invalid minute" + ), + ], +) +def test_datetime_parsing_invalid_sixfig(input, timestamp): + imp = WordNarrativeImporter() + imp.errors = [] + + output_timestamp, error = imp.parse_datetime(input, four_fig=False) + + assert error From 88a44d4f7e427e4753e45546b9ee15a399d51562 Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Mon, 11 Jan 2021 19:09:36 +0000 Subject: [PATCH 03/20] Add docx reading library to requirements --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6ee176355..8d5565050 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,4 +14,5 @@ pg8000>=1.14.1 setuptools>=40.8.0 Pygments>=2.6.1 geopy>=1.22 -halo>=0.0.31 \ No newline at end of file +halo>=0.0.31 +python-docx>=0.8.10 \ No newline at end of file From c94ef34894e79ad73c2bf192663ddbbce105f5d5 Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Mon, 11 Jan 2021 19:40:12 +0000 Subject: [PATCH 04/20] Fix error in example file, where hidden text hadn't been updated when day had been changed --- .../track_files/word/Narrative Example.docx | Bin 22342 -> 20577 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/tests/sample_data/track_files/word/Narrative Example.docx b/tests/sample_data/track_files/word/Narrative Example.docx index 38b3850ea5c5d74a61a27f98c77bcd83f5e78d01..5aa8b66b684f9e11deb85d7bdc42d4319c0c9087 100644 GIT binary patch literal 20577 zcmeEuW0)n&(r(+fZQC}c-92sFwr!i!wms9vv~AnAaeMD`_IG%H+~4=B=c%Y(D=T7E zMZXc58Ih6l(!d}n0AK(R0001l01>h>f~kN20F~eX0LTCkK$^m~HcrMiPP)qOcE*m{ zbZ*vG1oJA0^&|_h>D&)Chty>bL5Gq)@`E>d2#m zb?ZwS-#y7>unEm1MTf~0cYYq~#G+0fnV8Ew05Y0iLb>F?tAojU6s=e4(a(nVppGIz zl>}YGK{W^T+YFSD7aUUwj7Xd-_E7cPjk_nHs>M6tcTYl=y>iFGA-I2Gv&%-8+n ztv}PB56NmOn=q~zkY#e0yKdmkL^Ao5JFZyzt*^Odx8OT`uVAX~sl-viBlx|MVy=tZ z?R_P#@0US-egXr?|BF52#bdQxet}f>s}=goo^>6JtsLp;{-FPB$NwK2??1hIWkQcd z9|MfQW#D_@bce!94@QA3z2Vd<#xgjBrsOx|)g_C?k9VG>B_Qo%eeuzm*~BRi$1E}D ztt9O$tP~a4hz`hwXPrLH*LF96^VoA!8z({q}h)FE4_@}V6 z@owaODv^i%5@>TG%E>uHl7@mDdzHnNbjJAw3kgk6@N!J?v$O?ozk0R*w2z>!NMx?BmLX6802}}aKv!D_Bl^D?iIJ_L zv-MZk_eY=im%#vjb$nlU|NnhdCP~YEIsTwaiMP-p*PTKE_flDUh%!|OBvtke9uX_? z2ehHCL5n2uyn2qOR%G{s*~;wk2hHYba*S28-x65lBtR-=)1?vdEpN{!IU;C3k=o~> zFphf?tX*Zm!IrL0NB*)t+s+Ml*ir&JOu z%hWyM&ia{GAjQUHTi@C*xA-@H(PRSUIN8VLUVQ_hZ4zUch|h$Ii`SPmG!;2|emjkO zMnlkUa3RA$#qZUY2*PSr`_3@70rg#wy1O&j7;tbI{YOX?Jset-m#aClAkHL}H9}!a zl(PhuywFeV)>1=dFNl`BAN}+pk~cP_kdpovG-Dux_6QxLt&qc4ps$PkmyU!l0(iaE zBKi}L=lOAxScFHzpX{&y6?;xOdYy@I004Rq004+zkopsQ|BSjbZEMGTE|d=+(+gj{ zr^Z$D(aZ?0indO@)t_z61?OjnC5hRzu`rNZuYWx$jQmU zYjr#M+G1*h(w2r!!$yi|pr)APjXmXXueW`ircpqV049OpZuG>x{6*eY-d}6zE1HAH zf}uMEauM*%YrGnWCjz@|rI8p9r$F~+nq;CNGC)(w97tI*m&ua8Lue*YQ9C@14-N$d zO?`lMYXT7s*c~|Xu|Nl+T8{x(CUAmt_)TKN?3=~?noG!N6F7ksP!?8M4xWh#v5@#y zj+?t8DWF`ATliuw;hAKa6;gOXbX-Dc-X<|V8|N}Myv}O3CmgGjMR*qAm$O4Y4EnU# z+m+kR2XQ98S{H~OLCKDUq378p8yD8BLEM_*%o0HNb0{Fjgp{I`n67p)W>vRxm3ewq z;q@u5vD^JFHUq}+5W#{@|7faC055&@!y}}vB!4>c7R2Z|14(*O@}$a(^w5cOFDGPL z-Ui?{3_rH(R3xD*rgVrCLm%N3&0S>ZAzflek$TO^Yi6ntOm+BJ013buJXgOr3y#0X zYyczSXL=sY-~=ZtQdSshh@mam_u=Eg=3q%c1UDQJ-uS@wR?6tNl9u#yR557ZK$9-m zbCcbm?=(0ZK-6rXcS$6+$R=FieD^Zd%bo5w2w<^r$zfOx0_O@4n0mgHRc8o3!IMCa zAO@)+^sz%8`rMrz{#nM9)IEhAOJ=?6qM7OZ^W>T-s1Be7*3{qKrpm7QA9c;a`f@?2f7?I}6 zpAk#gM!W&)^&l*kfh`WT-l`=-h|3kXnnJv^Mn}?zQSAl!*$sW|fhVcTHf+;Wi+*F9 zU!x%{1y49s>UjOzR{T}m7`T= zfONlvXud(FYW+%j!9WU2l|R$@OoP&dnh66LhZ(zr(ecvAe44T^23+SEIHf8fp2FRX zDS;0>MOr>$y`pIh;y@1$F||e<${WC`o}_&4G4#>_S(e!&AQPrIp?GVz_b*00q+x-N zlmQ3Af$Dw<-*xriYQPOWrXE%BHTSUf79tg?w(G8~AY8?&7uO;PgGVhry05-*C|`VWuVzqoD3;Ch1IU8CfhWbZuEP4Gz!(@qE+1 zSr#?0Bh@RD1GBaaZaZGYMj=&3t~c9_y4p?Ff!@FD*XBa9sG~t? z>@ybb&b$VsLGBUB0Urm@NEJ%H zry>#;3Mm{ZLuGV*12*^-Z2HYV7ldS_7&(A*FC*_ggEoIvI*VRWJ7p87)xjb8 zNGsCjhtb=x5=Ae-&$D1$#aeT_i0_mdg&mOI+N{blHkbxvc{QcpD%i~@-*Pt7rsQCk z!Ab5S*{v~1>*VpRG~ElUHxMYG<)k3f6`=>_UONbLsS8pjLdoW_fr3>^J%n?EY$3D+ z;SIL-3~E8>A#jG6`ma4PH}E)0jXH*1bEJ0}>qj`?{bTBLS$&}1K$XmSGiWEmp9b!) zsFzz+RzBynqFy|Hge#GHZ$y)bNz$NzD-}q+Bn6W<=`!8Qf!7~ykp6RHzyX|%uw2lv_ROu;l1=dnb>h-LeFM;>Km+G^IAV# ze$qeroEUm6X1?97r==uhFy2Egc3da-JWr--dw2cpVka}+lTdR^nwDK@15}+aUa)0D zx&HJ$HOoB;;ZZ|Z@jSPv8K2(!6n3|NQE^K0nX2H62$tP(G-!_^p}EuY+H$ zjxdOjip}+4maXtnHT_x!3}_J9UQbmAF@QiEmTfdgQQ-kk4NkM zG|ZR3fkEztFO)&18YDwVhSOwTF#Vrv3)H5O2k|VUIzg)51$A zk$DqLNQ#AIk`PE*aj%-WC?^~#GtOBlw$9-TmFcTYkFLi8()Pj2KmEABDt1&SNaFII z({I3x{;iPflb~#i61---Sl;NptW}BZh9O7vD|Vci1&Zi4z{zzOjP}_TPSUZlab%oR zQi7g?nWjyfx2Ew_EQ9K36G#!aMX`z%3)XDI1ZjAWzck5thZ8qZlqBZ>=7EJ{(}_IC zyXJS;k(xjiXQD^l$amUAGwr0waC%bEs#s7N0TAR7as6+CN}15Mh{GWD2v$X5`lO5ViM4TS=gYl8tdjp|T#fl-AL_ zkprbVkkv35MH|bobWYL?YgWL#n-9sm&zw1VNM%nKcYyN}2q1K? zXk;S?%;<+%>B?e4QNqd%TUSCEh{GqNfVwn3h_R3s=TO01v>s)0;ZRw2I%!>N5>&-z zxI3sN(@+l})389M)kLP1X=Iu92o&HlV*JFZkWaj$N2+gSC;b|FIIk%jB1O|(8~UkX zyT0OuG}1%Bb6)^l5`5<}b5)(FQ?FUjHXOXDR%5GH&Kit{wsy5_(_CE_?HR6%44qSm zXGxs;Cd3@B8!QQ9!P{#40NwV;G&C@gAr+S)p#Xu#S~s7}79oQ2SC|lC^aV>N z1+<<})=Z809+I$6KZmGc%2T~ILT*wBu^{LVTBLN+8tc`u{G=tMi2S@gB3yiR~c7@+Xi$Ik0s=9d#+(LqPFQ+t7@K%#uZ+$QO0Rm z^`poo|D{RJYujK$sj*s_{@{z6Id8ox`?EYl!nmQbZ}OHRyg9^yI?l&1*2i!X&u0Yk zO}f3V1a!yG^UeVg1kw9*?X93VI?Cqwq`OQ{BJPNq$kmFl`}ZiNh7q|ge=ao@=(Z9E z;3yg-6fh-_0b@qJ@d*%Fubf=(1Yhnp%0v_0@%Nn(QklKL{=vhWWm^#rQ+$Qc z{<8jWeB8N^8H{BTOzA@oJnbclx1A8)ABgL+;Q$?qoPBb$z=QQGLB=|-e#T$Al16s5 zdwIe-*Rhae0%>V?&Qyvg?sh>Tv&6Zw&O!= z#zfE(27?(R{)AhZ6#zF`w#vPIjwqtIMHbb`)n?ZhM>@?RlOx6I*s~lmPR(n_5~g`w zaBrvR(?Pz>qC~bzsd4fOH=9tm)4e?rYwr6nnG!f4bqxO%ibg()^^Zew_AEBLY1^CA z^)e0Nwlu@GLJWxCBHP2GgEue%I}H_Cu1Sh5Z|$q*w#P$g2CGfn_w@=^csa!%7^S`J zz+gj0GD?>66zERvrZyA|ac!*tqpB6*FBY1MC*bPshvAhIURPJU01-ZkHj;Ufd z<|kR=X|UpmgjfeRAgNATC3V}X;c!BK4+RLJ=vrzZ^a~>KdLI{4uiv%2lI3;P7*a3j zCwrM8R#) zmNWc-5v<%#*H?#Sb{3P{zqD8{E1}Z&uUuD4egBqQ6fmyzX{SL1Nv>pvt_cevWx2sf zxOB$I?MS8x*E+)NJ`uGtLfBz(e&A5=r*x?ZS~2)c19WcUNSq{@2z;PRqJL5fXUGK+ zOpkrGH2>|L+Pj$lQtc}4BTfa)PvBJd&p!U&ql<^T8qA^~008=5(^rJQX0T?)`bNeM zjDLhp`qMORcQ{b|HdR+X5%$ep3)vvIk@Ec!gBwLH00EWGFES-zL}Ye`*jjw~2Y<%1 z9DnoEriK6c{KTz_uj%#fa>r{POX(;%mDuE|l47TaJP6Ci`}}%yJrPGbps+;{3(`vd zgCVi&-uLtTv}cV=Iba!(89r}4u_Y_CIG@2Y(!2k{d$Bkup0%qbye95`rK!$=U`_$6 zyjV<*kd(~?Mj2M2R{Wc+-9sJTc-|2}B4};dlBNhJUk>KV@3@p1l_;iCdr$}-D>LIH zAI9=3hzhCP0~LbWauEw{@p8LrZ`?8kGUsaIW8(yS+%jcZpP${5VZaz#$S1^bHBJRj zVV#$~+yJ3rZPbg7&yZ;r+`kFf-MV^-(vlIq)_*9S?J3r47HeS#>h3 zM;(&4IDu8|`tlfyoMp!X$g&-~Uuy=Q4)kL1)=E)SS8F~E z5>9lW0JLCQ9&FCpn+gsDX}n^6oG7|{0!%~;D&eJprncQutUqyzTOpGGpIN3tZ{7Gh zuMa5Uw1VOJbD=r>uX#+-vh8M*sG!8n)Zg)y-wCZn=@t8bi)6jt_n9G{JI{_4&NESu;9Vz z6b|e0;nNJ-lA_;4aHSN--l-`N7Bl|(mQ=uE!jW(u9gK>z$KdB%6CA~IY%#AYBY)2J1sZo^w!7|oNE%yUTf~tctli> z%^;gPrOj7XySz0VaW)C^3CxdyrX4G;UQUy-W>A&~59Qa5L~f7go!_i{A z-EfrXT;>9$KRsVuv;Zs3hoalLh7-A_7-UJohw92OK82Mse~ji@D|_?G?^1hmUgmL$ zxE=1+`*Tf9zT#sirB*uh_j31WIrn(a%>6a~=bL(RSOj09SO=VYj3~4rs6!Sp=}TbY zS1#kf1Z)M==ya04k{Jll002mTxf~N)TPGV^Cu7GyoX%Lvu+1t1LdYfP3mniTTa#NP z0=L<2^H7z<3b-D{2pJI_0p+OAYk~$1s{^e1&pB{Ot&dpdzU{O}c<~f>@O&0jFW7)G zc%I%m35w0Co=e?La_TMfK(_#b$a12-bI+K#lMc=ax;W#F!{ z$Mvz2!_inCpfX{&X?>{VG^skFx^ChM_jNfjtXo=74Rmv6LrQ zxf{RcO=Im#5kHzD#xLe+5h=fY0nA=|a(9U*`Oavl^B}RZ(r@?HxGR*mIHqBgxZLo3 zl+3-A@XKD(BX6l(%5|#mU)uj`r zvml)5;eg|Xm&i{<0Go?#bsa`5@Nz;G8Hyj#4d}Ro0oWtA^+H|k@BzMrV7F#J-yWP1 zKSCK>k4Br^oUlA@ztZUcnRL+Mcjfs5!%$xs{-40Fv5nC`L0H;xg#lskGW*!|>PdDF z9AgJ^4FDC4+tF68NnerX z{fg|0mnv*5ZHexEvnz)TdXpI~66R2Gs>ru+pi9^L(?zvd*+@vkf=2)%F(rcLJ-dlY zbSMsYe`(E5tdYgR&^ByO!iFtMURf$Tj{+jFyr_|64nyq{Fp`(eCy+Q)zwz5HQa>Dr zs8MKBQD_XJ4s3d!X5&#{J|e*t1JrBjlZA3XmJ08b6ADD-NxaFf1bUYAKzu%*IbRIQ zqhzFgp$^xzPk^*2D2z?v!BTI@22sqtXC4{A_oaIF6lPBllek8VvHn+xNC9V}D~`4d z25~JDme__AqEw$e(Ul`|@(yO+s{8Q@l_SEB#e^mj(30NoIm9xKVZkUSgLB70w{(6RqJi1H!Rv>29sRc)|W$+YU!1v=v8fj5o^xjNXxRn7U;s z=M8i0+=He0tMI6hGs^?(ilb_flq+s)zKvKu%wbg=R@c5BB1r@I z^O4FIglnrVR9KID9D}=cofpw-zMU)hPsshP?I)NaeZNJaC@I7C44H7x;sHA? zS*r2ap6vPFF+M4|O_kh1^G*o=itevIPGik$OMIa1#z$wT_kS`Tr!#- zY0tvz-HtLLs-<>ZFptYMY$G}VX8Aio^6^B08wXXN*X8~*#quP|j*cJ~h_th(YF<^a#x z42wvc5B@#bg%pHlM(0<|tVVyGaU1xC&mbpDaIF=VM||PK*rD&hu4J_>98H6Z2-V1K z-MRL!W>JSR*a zCdxf=(@1y|bQW|%pV0`ZFxJONQ(v6das)I0bm4J?iER)NXa7QiLIb4i7N)0$#IrBnNBm^2``SbeIqXQD_)5M8Lm!?gR=|0WsnV=U;-8Est0{gN-*wpB; zgwz!5>07l`^WPV^yWwOcqF6Iqn~2>`>v^coTpHVZ&edQJg}G8-uexSg5?K$9nKdfT zcjOFU3eMPphK{-Aq2tQ%vjMta_m97z$EP#QAnm|G1 z+MlC#xz67Zyvcpb)hZgF&(ugo_#2fkgWUJOru-Pzg(-o+000WH0RT|{8ofA~8Cx6E z|4ILxHJzzT+ainObg7=WVw~f?h2bVZfd7txr3K0Fm$%2cjBy2@yr?u8fb3MUt5hzd zs`8*vwB=Q{ju$uy_Q_~ercHi=dY?S==rD}!iBrx+!*9QRdf$xAuA|rah0W|F5>&`z z?%s5(0w3Rh2q@E=;@Ysu-Lg$sW{0FVfIM?UCgPFW%>TAUw4sO?4hi%kVzZrsPMK_@ zLNya=Ofz|#L{iJXM>GIcOjHR1gb%!#rgAcW;z2sVaxEWVsOM?OQOGCgJwCIV!*t2? zB$YG=&4(PC!CFTF!-g?`CoF3e3h5N2J_^hU$bXU8HX;lo4T?=$W5K6m{Ay4{U|<1h z9bRq9$Ehug5OoqF8x$B&yI^UhvqbxZ4WJa+YSXfMya43`=I4!1Gwwubh}{vb;W^R( zG7=Km&OL9^q`xcuwup9>dG6Bo(|jA20-Fu(3VZ&3Kz6+sTb|8|?qqZ0*lB^b9niS% z9@Oiy_rRlX09(G_j7dY_)D113l0bN(nqwOqU^9)Z8%BlFSXJytsqs&=U2d&A0KI@n zRGGMpLZdM6Zf83l1Qbg^x5%v-+2*~`U{YFWN!=;sruJAR7*xi+GxzJg-vx(6)ed@HXpE_@esN6u_@6P&B)FC)6{cOJr1=D(le)B#+SO28`DL z2gK=?vwuFh3{vjhu+UnYiO{e;4nYtRiG1+X*-Ea2lR5`dZ^7K z`v8d&1pLy;*=0NN`Fzk`nKRi*I{B-XGe8c0B}AtEZjQGw3b!1Rc(L$!6KM?Eqt=Wp zq?BW?1QEv39*quVDJp*J1i3uF52A#i*n}jA$rM*A$pKOoy5VfEihb)nufFNvP7%_q z^1&^RwkDZA6BL^ndOX;L%1nk?O=W2%cowq4xEr--KFT6;1}ZUoW{mieY!0^fr6g#@ zgBKY#JOC52ag!Nrwl>Q>%qR#rVNhbV!%SlQ$r`lfICA%7hK+SqNkWrU!q`5VbficFj21w>#bd{US|dQ4;pKoM>huKr0E)}%DFq~ znt<`yva5`ml}ba}unKlnpZQ`t8NnB5EOu1B%xv4&H-P-ur8|*u0>nvUaQH%LMr>Gt zLo3F_Vs&Grk1E4C8j#>MPSe!LU6^cboYaG?cL#ghi!(2y=U-XkaH^_k(v6D0hSHXM z`PIGbz>O|@-_6F7SRUfr>?qH_eFg&XrHcsvdWZk7W34hXUD|}ND4Q1SKTQ`Ljh&p# zZA|~1@iwZi+N^RQcOcvXE=9!4*Ur_x(=<*quEZ%V<;~bZ&MLTg!7m`1{ojY?7yNeM#@ium=Z` z;*Bq2>31GnrFMM2E$1%QFXD8O`s&;RKlPjqtJXM*0t7f5C-8}K!vf~6TBBRj(5+i2 zsOLz8$_6=U8pp4gHju*6J&ZhpilS3m?wdWa=Af=Ru*LT+gjC>GVKrIG^AmF;Ljojrn|l#VE!AR#5SG%JIr zw9LHdV@>t+V!E)is8|h7D<5ajZg=MQKqt%Qrn4{>cq?(MPJAH9a2S_f67gD4Kv^oi zH6(8ya2UZ{*aKRZ8yjLk?rbHY-t=O;ir`V`^_W%npz&^ZniVqu3eKJQ5qlniy4T!3 zU?gAcD6WBswR`H`iA>p;(UOv+iHhRb+x-gPjVFdYqDeUj;*O;yOo@JPT->sXfof%T z4cQi5@#n(u5_$PCxo1ICfwu7_X)c}q;Fe4swSMW{*QH>8f~MM6pO-U{DhVnAQSd&$M_*ytk1)9YKleT=)007>qth!ItevvdlFB123J8_9A=!5 zJ2At<;~_?z7>nGCo1@=Kf5li|&IIn##+r+f?*vGB0N`0T*8G8~v&yJV^wZoO8`k|h zl=kOC`luGUnomGQLFIIZh_QC9ENNK&K!TmOcv@lwl_n=O9Qzs~$m7&ucH`@u%_QHv zJV5jHI-#b-cl6tfl5k1ys3bh<0I`j(Qcus#IlN?QC3d&)k$So=E9591DK<@93})B{ zpqxsV1nU>rg%-80fisYOle*k>7!CE!;g~xia%fHUJtGC`$ZQ%LV_Y%nq zL@s}9tY-~*s)yEAn`_m44`eHTTvd(M)rXwDW?xO^bZp=6y8Cgl`DiSzZ6{}nZy@ww zS_rV4kff9t0qojakJcS5D_21X$$ZnSX$o|5m{5fZvzVUb1I=VQ{RTN0oc?dxxM~ZX zICLk*3i-JeDOW>NLR@F?k)Vh)TwRQpxuq$OyQ9wpWs&eY(vx;teY*6!=KK=Hffrtk z*e;3cEEgGM;QWXz*!)mn*H2x=4(q1V^M6|k@b75?I74~{urKGe?+XC%hl~4jp_`+V zyVc(Uc4yhzj;pPxA(z~iZj+t@SGTFUKk8Xt1S1o!-@I6SI`|{Z@NR?lZ*Qj;Lr+z0 z#T3-zc?c;dkDrgyQwnr9-|y+Wcyf~G9i>Jk0B&w*A}7To%QbS&&y%kMObWfmQ(4gY zC*0!WrMzekJTWOdYf@?qa%d%g7t_Wc!3CDMndb{sNZWiiM2@NiCkZIzEDfE z`x)YFeJ#*7cnLCWpN`^HnAxiZ8RS%VR5P$!!!iVriV((OKd1P?yj(!T*Oq~tWP{viG zDH7r37_~>N2z|!G=k^k5;zp`et(1OJeO!XEEl4>f>&g=;nXqh#l^zrzfY*zd-!*6c zTsdi5fl`^?mR51PQ$)q>QWhP7pjVhamR7ktD56qK`=O3Fiu|ICjrbk7w2cFeJ)aY; zA&(P{{_CvBd!S|yiD9R~Y~Y|GAY}u#R6RjV0E6XbK)Y+447quCbD4Es>fu)1~P%2;X>O_wF)6;9k6Q zP$cQ2&_g_E*iBSF=%Xzbc@NTn`1hCC&wI99xH3rZ0XlC6kv{S9~*O(S-fBa#tk!wH8@@em>r&6xi&S@SMiht22^^-?^ zYdr=s^HAgr@kx&Tg~8HE4vL9>$N_N}czIo|VRFfX&iw|5Hz;j(p-^5L5S2(Zf00J_ z8Qv@sZA8mbhrqMS-5;j;;92ub_Kba$w>dM16k&j}!hGIbLlnzs1yWM=9U^S4#Nq-Md#3Q%( z0PQ+}#BVY?k^~7$<3^m&Orca8{71J*iA0dXZS-c|#&V*-h~y-i3*5MX?W8w51w;IR z4zu*XFP-s+BC^V`G6dL6$+|Jk|D>9dWJ{d+Z zCgm~AAvn3ED#O>joAxmzQh)@KGQ@t~ip6YdeIdd&8 zR_wa0hEw)au~wa>`NIa@Pn-Q&XO}d~m0Oy>T~wD?56H)P8MS|3nDu^FpOtqL9vI%f z>ZR(}432d{-#F7~l){6jfjs3|)^=!EU88d6swJts9q&#UX{}1|_HmPoxXkvqTS4bP z75zc_B>S3grrp}G(kY;kdIr@q+eu-ePWmg%(ESCVoOz7-;ihlQkUrKvK0BZ;F`H>g z)kWk-Pv;c=H`)8N!m<-hUD-PA^}Lk|yd+Lv-RlA-+Hh%H02`G^ZZs64SwSx@LzaTR`yVHI zflx%G0wJiia+m-%D$$f!@ZUKHy*(}M$qR&_R!R~4|Ku!xKuVXC;wTp&DLHB|`bym3?@KWbiC8%4qSCOCYGs6g)T|^N zlCTUHio!_^a|runtQ^guDOgCgio!tOEhOv`u=Li7LMTkj2uEZrIFj{?LO?3#;{1|V z9EF+<{?TIR1xU+EqW8DfEfUu5R%uv;i7%O%b9%uM|5aYOoP{$;0V%7LegTIK`4~l5 zW?XbY>z);glNv2%5w5B9e4`lVwO@<3?n84-G~){d0=OUY9TF zt@Dbha~EPw$hC^Q-=)8o*;f?l{6$FH{?`1rdxn=~zuu-r<0SbnQZF5?N$V{64|2{) zsL6QohoFu>h|3>>=>CO}R{SBd<}Z?*-en?%mO>kT2i_c$&0!DVvv2-3mD;KQ- zy9}b&(1bTL?l!;LBv#R~o(%Jp!$vK97o%{FsR_J!LQ-21oXX%Vx&*@)nSU{am^b=nEPq9$@T#>2e@*j$d!YmD*?HP0XC}di<#}5oQj!4X>+Ur zs*o|KrZy+4ka$)6jk=^yZUaf!#?;QY{UxBohu=Em8qIBQ2Lwkgu_wjRK`Zc2Yu!b6 zfJe*(S=h2|b(%OxtRI8g32$Ad=J-W3 z5tbsdCMs>i3$z*U#IaUv4v$UcY~k~LsNo0PS&u&_UK6<|cg&OZ{7?0 z&h6SYt61_PKqQ{Pnk{*CDSanbm?!Np$*?IuR(MANzjc3K7|co#lnZ3Q2)QKt z0H5Jb5@JdQke)l+OAs|RX`g>sU69b6Sz!F=2!>^ICP${*_wFRu)g3DFR0ri|_->{` zfb^RJ;5Q4pg;MU~K25u*zq7KvhB+vxMNhuDyLWN}m*Glx?-5oMvL3LlBAcPJfAhpV z=0J+Ipdz-r(6e1tbm0X4IM`y6_YCY7NesDKKtUE)1h>9gteBIs0~^&Tz0y6<1cNex zAZMhc$W^$rUSS8v6uD_3N_EvX5myj#M~eYSW^s5%qxky=NZ0reotKIbA!lty0pkhv z0T}rb8FNkQvXSfMit5e?4faQw8qllPX0HW&Ay8@s_4DN}cXk6k@?Z2SWW0=c;Oo5* z`+ljCS#b;xlSYyfDI3Q2Z+u+f6#OpEYEW)lU)AGSm`^Wk8+$j4EZK8z zXVx`p} zEamgtT^an$C`*1tvDi5;F}%0-u-dw*m%BuVt1*1CWY#jK2GTlkJQ*K!Ik%Oo_%VV_ z*4X#DjL&@rS>Et4`Wr>(A^&)xeHk!cy?h&qOcgf@bN(PevqP-Hm_Ix~j#nML(ev5MVvQ<=vRuBtkKn>vtFByM?X?yTfD|5QS+ z`4pTnv~F;bjes@qV|WFx5wJlB7b&bYd0-K08Idht$X4<6(6(eQY@XtIL=}g;h!^x| z*NJj`CIg$*3|!->88WscSi#unSjsRBa^V%jwZ8hwevf!=Zoko9z9U|QPN&5m2>2Sm zG(GyxDp^)NF)Gnqq)ZQ)KNQwmC#0oe4~X-@KBHngU0mHcKklv)XbwIqXY~(@MrPIN z(D%VmD3=f;VWXXJMkKmXur{b^*NmieyF^T5*z~<5cG$oE zyZ>nuWLj=F+g~jzUv_~3KmlOr?C4}`t!8aSZ|-Dl{inD7GjmRmPXa)ZOYnDu{NFEG zaetP)hg=4B2v4my;kKAl*h@86PMt30sxI0i30r}$V&WQJ@`Yyq+Lhf&jO)Z#8O%(t zhu;r$NdaC*7)na6o^{!}`fw2B748) z@I+KdbCbVJEES2%#cQiM$HN$*gJCK4Yn6H6#&#%SrI^{j4`bJ>n`1};6Oqm4I~%O$ zVRRVeyPx)^Wtj#!dVy6xCn0Ex^h{=Cdalmxn@v|a-`Xyg_4P|vN=2va3arl9F8S84 zUSDPE&5rz!F9rRl`b<}t$#TE6@%D8h|A+eipEOrFDF_wFfDpW05roq%fvz0}CDc|G zETFnhV$b4FLOcxN@d`%CE125%`lPS1F0q#2KStul0FqePCp@^>=NM;0?)d|UGisNI z8`$uUPN>Orav2hz3Pp>?lwRUzcZ@FQYGCZoY1|eOOJWf=w15%RYJ^cXL#T+6bTog{ zrw|ip1WB!v7plhDjqQXvi&)z;`m#>cuF%*2o;h>k=k)0PdTEL;wcvgE(tqIQZzcYJ zpy$um|KIxlFD?CbL`~L3d~FB;w)MWX(PL(twI)A;SRzg7#4QE)3rk$`o8hRMh52UZ zK{2({5k>gtt3q2xu&6d;Ly1{#nT0$CE-arcn9u9G1#ragfgU=P>26%i=Ji8y&;H+1 zkk@C(75O4;43UMlRmPs+!AA!8V1wCn`IGAz0?ys97FFABj9MKNNo)XEOqkuDVOansKbE@1*`>%;f4!-{D3=t59E}~qEyL{ zoOXT3a=@hugGo=jTbDa5=!iKzf{8>RSDfl>%U$8cX!maTiX|x;t@u9+J#rkk*{2lo zPR^1Ppo(E`RY8t5yniNICr&8gFf>f5wb}-SXjyXqn{uw+zn@c;>am;+~T7HMR1x#_plc4A}9aMc$SYLkfZHkM3*!C<_CPR{@Y zsE~sAOlm%5(V%utqVpWWt-P<}65a#~ei z1BNCmP5??Me*=rz9_NK3iVXt18PV+DL=8h*ffKn>CU`sX=XgJAPgEZZGxkqBgV|G= zc4gnZwDI8@9n~Ej-5jl`*x!#eFy~HpR>0PfnWHpbSRRfVjVsq~qcCv9y4Vn+SRz@# z!fO&AERZq4&5YQBG`Nags**T^aOBV)tv7^==ixk%FH6Zz#U+E6WQm$lqgG*y*G&d7 zI!uB@8Wpd1e7zq%zi+9ZODkxxd`y#+S)P8?EiN`Y>iNf-Oh6#ouleDB-@WTEd;aVC zU-t6)3cUY|fPdXC=r6`Uu2Wy8^-r4z{X65o?xXV0j8|Wy+&>e3|Ce1={$0|)?~3sc zSyqt$Wsi)1XaDz-q<^r}Vf>x_p9+-zo%`Rb3;w~Khx2#tf2lS2cm980+5Hdx|FcwJ zoPmP@d7U@9cJx)X2<;k5NY+Ew*rMx3UtEaL9j^-2jlR?n-3auhCI};@XhV%aUU-78 z9ett~p*_?9rX4=*i>?`c$OECd!xpL;Z2$z_1k|29LO%n;XFCQ4jD9`3cGR91vi2rN zB<;|C7`krMHY&32X)X*nTde4&pf=TzO}XldZVI^Nf^Gn6lLy&=b8g56U~2%On}uF+ kB22sD0gaI~;PHU)YBa!`6`0vUm8=j$KXAd)9&Zp20MFznH~;_u literal 22342 zcmeFZbDJg0w&-29ZQFL2ZJS-TZFkwWZQHKua+htZ%X+KV+H2o)p0nTk1?91SUYgNRJGJsGLTFqSznrzTf zj)V)nI1roK{QOvOzObC$D`~m^y|=hKR6Idj_nVdykKm8hj&vPmXjQ)iUGtW*m74p< zTWD$`sFI)?IE&`M(P~wEKANQtxbFgH=$5~TT=Y0&>OrZ$A8^~Dn0wU%ov>QDk?#jC z1AKO>Ydf6a1`Hq}gKN?D2oN`eMP!)1@Nf`vP*;aKN|c-ApG77EjZw%c_@JQo4nl4h zDAlYdd77dUdfqOd()p}XuPa&vsLHR0`SAsj;=hf74<)5&Nx7C5N8!!h$rO-AJ`Mf? zr6&{}Z)07=s1xj<#CO1+UE}*@7o~p{aJ4&R#Z7F!8|a61+xDZC8NpNv-{|{xmZX4Q z_|CFz)P>BvUm-r79y0bsP!(LQJ1c*oU>4D>FLq*E|HwDTU;W7IpF9~6zM~DISXZ;| zJYpAOrok={C@rPyWd~tEc?~5p}%^*o`bQKBR$=p^Z)Dd|BJi%e|h!D_&!NsMud3%*WKbx}0nZ!Pt8__q= zSk&ieJ${|hbX)mGciD@Qw{8BzpEk3#x45lalkbi{U#93xiBS@ANRT}K5;5Y;tx$1j zP(bg2F^0rrl9tQ?+yYb0**oHy9XK6w-l{9O^InRfLuqd8vtiF?i`(bNrSm=kGqIg! zp3yjMMDSzkx0jk>$t1udYt@vsw=T+Pntcffoyksd4af00%X&V+)7!$OgNEJAnW+Q8 z`Gcd~odUb-^J!`Y$t_WiXV59|ulN3+og}79?Zo~oJSub7}5XTQ;cj4 zovptj<)0AwPp<*|ij!Zp|92miNqw>d3@{;A;+;ZC+%nwyz>PNO{Hv%T+v!EJTC*f7 zyMUga{p0O6)SS86R4WQTzR!ED^Uo=j1@H{bb5z9UnXwQkOYTMCk)1U7^fux~ab=l{ zngXJ1`eR-$K5X6r>3M5L6XkVrN(yYmq~9II$__}yO$;~u9H<4c$7X_yb7A+cDgn48 z;~81KVVSsRBGKGMCy9DUW*z2R$Hi9bcSW^m84fiSX@!Sc$`gcCEbNs6y{l@y5P&!| zOGcytQ{xhfmiv&nChhORM|v@}Qaz`ZkWk=ok~I09;2t%6n0bTFVj*&BJ$UnuNxcd7 zVhIE-m9cd6oYCitybuy%5pr*dLv)5iw>O9NZ9gi=CG|N|yCAJIUrx6|DnAW9dVH`w z&q{1GgP8fvf~A_v4Xy-IZrfsu2`Oiys`2{R3fF43!ee2)tQ98M*(2M@5l0#)=AZ;T z%tZ;!R~g&H%=b2;W&-j1&awZK$^N*Tns0&!03bdA06_e*#J^1TzZ&bgj)LPZ2a-=u z`71nCwu>Tj7-0~C#Z4w)LZJ&91p#}DoLV^Ic0`?aGGxa4{Sp#eT_2t}88(fWJ7Kf2 zMwLxdw~ez@kI%c~POjK4b4L4=;Q9%`Ly)cHWupEh)xmFSGkKmfWXfa zUzptneAPYla)a^U&OO)a0?EyCRM(X6mT73cTM!lRjfGHsO8{Zbc<3@q8CbYG&fCGU z&T@`neM4swMXU8rf^_K8T@R7Lv7~j_TDp#WJgu!~@b5|`_((WJ&&R3NT)SINvV~GN zZ*HL6@V(s*E|gQ|Fca>jpiMX6wR5wF%sPhaT>!+~P&3$Y=7RRw3z7$eZSUKw-IEKG zz=f_&!4{h2QZ_@Vra#(NZ8kz>bhME>bB#4+VD350&qYwrkHcqiJPU)+RQFsC74f&Y zXNZpN2TAJ-16|rn639Q!2wOLAC!%s74634FocE{&hSU9oc^$p=g% z=EsjT7xzJzTDXkDQ)GK{v|Z@vH8+mO2vpU*?6&W60Ja*g(aYTiWgGTm&3m_QDdhF4 zlj)^QUusOJMbbiPF`(svl5x9;!n8uPouR)k#0Zjp z(OUt+t_=wrF6+r_&S;fYXN-JU+)Y{O$@0-Wf-VHIEX));L`;rvH?D}2`wKETKH0m(GZd2R1klWwt-P;X%g5K*E z;YY|0NbSV8Eh&D{Jy_0Y}1wWlejI-;b;}@UZ)MSAV+fJ1OLpg z?~#{jhW`5Qq+z`t3@SGjYg^i<8zq6U$LIu9f^4$#5Fe7d2WAz7i-Kl_6M*VWDFE8f zj3>G-p7&m}X|BDqd_ZtVH^*bnkypAF2?lrvKsEXS{>m(d(A0~*#eF~m(L{n|Q_*az zdh8~A|1N)%-@C2PyWN^T7zDHGyZB73`uejZSy&hXm@`W@vZJ~#E&`;$sy*zdojt*0 zb<(0m1nbG~iX}CEc!l!gm_D`{gc0p@bwEs< zB55R&1zpXJ4cf@2U@`Im5{gO`9f}j$d@or~2pl7v!NAXAvty9)8+cMn`w(s@lVJ-u zk|kDcy1L_q($Nx4pP6y{@C*Dt8)$Mci`7!LtD{R3&xit2P_>in{xXGRt zEw~hwnWVeYxU6|yGu@L@WsK-VocZ&NxJQMhBitmE1rCqa^Q|c~2XW7G7YUzR3)nGc z!sp0dAYf(wVi>~G#H0#G!m|rxNI32?Z8I&xqgW?%?U@V?=%=wTpgoWWM1S$D`vrHr z?QYDxLeJ_PX*_4@y{~bTUbwxy?01@N>2LuJX1XSal}<&lqVc-T1k7&q`H7OMhj@}b z<@%ITpxd&{qB2LA(T6)q+qT}#U( zSuswE#6Uq#3YUkb1cl%Wb}wpl_YAD0lL`BMvY7x|tB0G1*MKRy=9xy#kI8}ZN6Tcs zHkOIGhc2iD+hL>-0UO9t7%3ai(FjnHVIZ19hU-g@mWz+tP|+4c78Y=%Te9Nc^D;N| z6cJRE94uHE16$o`1vEtawS&y+^T1VhhQF(2#$3(XKU}!+G6sllA>@@u3qYJ1&K1$l zCX!UE9A`apF0_?@vs>NZUE1~UMQ}f`NMUyp7Z^qDh0YBSH&kB7%gY7r@3DbQq*7B~~PZ8msQ;6Ys63x8$yJv5? z@|}ZMD$(JLY)wj$aS4yGi#I|=42x@gGki_!ggKVW{N(sm_aid3o9glR^32ISNXdj< zV~`q11`~6~@8%`E^x@-XS6X&um?aYVPaB?AY;u!Gr=zZ4T=dv#xIP>w8QHewZ@T2woNJ4IkKOq1Xja=htuy-hMe&_eVu7U^EA0&OB3Tr>zopWF$(lm-B&~ zF04V)pxvz;{F+75lg6>4ufSa!bB;OsQ_^Cnhq0H_yq%bip~j zA(jYilS^r6TW)ppH*F5aWoZgyHu%xkx9-*t{T{Ow2$tjqS9!>r=SNg3 z**SF$GNs*408@2{(iS8g3rrpzv_`bX>u@gSmO8_((X6*vsWyH9Y}J#)7b=ArG)|xl zwzGT)+#;rGXL4_k=Jk_WQ;>Bq_lMI=-H#0IQ+(zy2~eA`i~fjhBQ<}mv(~2dSDtxd z#zF@1wc%Fpz+v`7M-Zin^M_Efo=0#dF>6D8vconf2(MYjCXXr`3Wzt+?3omd!%R{t zSAhe%PR~n0htL=dpr2KjYaJd*67fLyoazV9xw*O*(zU!G;&~puYF7{p4;E5Q%mO|@ z%&l#$qW=DyjD-E~@OZ(f7YYn)qCJEF57zt-u|kG0!>$0gp`D}b^T(+JrTc*TdL8ZM zS$fHf)w@@mXv7axwfMc^Q9!R(a1Y4gc?jx!u3}-Gyl)cj*EjhD0y5))H9Sutwua=U z>YsZ>x<|hvP!0MfSzr=CpEm1OGD8Q{^?87e>H|63UCs#zx*_}^iLDTb_XUD}$s2Jd zBC-^9gGQY31c(k!T%*&p|K35=R-@9y5EWAEq`-1t_O;u_%p}EIggn@;0NLQfMdDMc zXk5F$Z7W8D-<)lZF%n-v9?!OWR%T|`Her{_C>!}cHRqB#)hFhDb&q3yG}J&|?k<(4 z`T&AnKT4g9XI8E#tQ;=hF!&W>Ge{7Uh=?Xa6zXT&^IEgDh0 z>Q@vD_@OWeVH|frm;1>*k07>L*qlOxR)nETy=s9lAigzLD+w?Z+OgXw~?@MLkbbJoWx_g{rVPBLmH-C0zTfa%#_)S-u{HkF6(q4B(W7SQO$ z2%}oe^KB~NP}Q-g3o_|=fv(Al9)WAyrR+^PFxiiypL7DhVm@S5Ho)|8V(A|N?s&Gt zw63AxR7fo~mtNH?{f_eE3!iddC4SP5`{tj8ir_qpvtG4c6%H&_$VVqTYxXk%Q;o#M z@#1jkI&b?dCi}D0(~&db>OvQsS+pmg%|#?e-p)j(*S;%vCeDrRjt~~*mNW0Ds<(Mr z?YDO{@MaCj>o&&G1}rXTK_0mmP*{GzxBQ-=DUh6QJfMc@ns$K3v(WwE5N|jJS8NmV z9Een0!x=>VX4pWK8IIg1gzkYwoDSyWLQ|feoAxJP`f9fXhG%u(L-I^ZcqdSM_>e(R zMDhf{>^L+MK0b&?8Y`-4g$r2%$L?5nE1nYUOR?~l=2Bow&AhVJFZ4OPMUap>SSjy2 z^csZNTXNvvva?k6B5&0F!#;2=xvW)dQTS4T^F5o77)EV#pzZ$8h)fIbGzV4fR@QFC zM~1j%u5fV~M|}k8*SXoC&KbH>Be1LZrL= ztt|BH=)aZ9);{-Urvgu4e?z}{t$v%OZfRi0Ume0(GewHB`5E-x!N7E+j3)-9tVt<1 za5JvFL*S5)v>k^)Y`o6{U38+f@l2(6{EKcql`NY$z^0tCtc{4OMP(tOiZ9s`)8ac9Klo>Kozy;u=v0E5K=^-=2^Rge|H* zUIxOPj^pQ79^>=#Ty^Tt5Zot;LJV%^y*4UL8H>ONF4z1TNMpAuZnaRd9@fp{+zCs1 z&##pqF-ep2=4d$D@Q9l$KQt@HYM8O4W_tQ&;2KtRx>)y`23r?aj=tJ`P8z$nsezF| z63jSq8`;)q%pQhmP=Z16np{ahAHLRrSxrJ}{QLI-v0A9mT4YVqP9hIlslE>H?yR=L zgY>q@!_lSTw9)Je2ONts{_9mCfF7{yjNZfsG-VL)NjSl`rrG_(xSmvm=9D;9F^8m0 z3n~ul0|tGqkE(w^l7bbM5fZGO6vxcRrfKI}A_jY}#ev>qV)qMUVM1&-2D^_ttHCNZ zm3%xa6>Ez%c89WPY7iwImrO>}nadEsMqVZE#swIYQ?WZY&x*YvH@}?)pgzN$yxU zQ*6&yI_ZC=rKCPUOc6i;0J32L01*Beevms_EG7a-ewYnSTl;mTbgn zBI!&02#KU9q*a85NVRa;lHT=OBx$x1TJ`N&f`bI?hncKQ4h=_IeK z8`4Hde~^Ct#RQuP#zJyZZtA1Mv(8aWNFphOGLP~&2~RUgzj@W;{d3)U6!uy+IHNAt zw7SU1rhqVjRW`O&M!dHyH?9-bY%W(p7-)Tz0v=mtqWL%bu(@BbpFD7)O&o)(9b^{_ zt$+aFdLvCq96@|CMoA>a0OfTQgoI zk4k9CEt%GMtKstpiB83Asc_zXiojKLe`XgTHDb|JZc_W|K#NjuY+T~iU!bT82&AD> z6ga~)smdAjubk!&S&59P-Fld%VzTU&li_iCLfA#Nq{*5Xw z7_G1*wxd-k56m!uiRIVjam_tyQ&Ryy+hOckpgc`R@K%9MAv~owlAd%G*3_3bl2N72X6~CRpaQklX?H?;u2TmS{MfQ0{>}Ed)Dg0<#k(VRU z4Bu#S!x_Y|51ybfvTd<0VX0y_Z%O|JFP+)p5(Q}mv8^C%!TqZYmb{{gMyB1hsycjO z8JXN10)R1E`E;CtNPE;YU$Q4~uMOgjKZsIu=Y~wai<4Sp~z#KItvAc|v zC*_CK58+(>W~j7$kbD`=B#v042)$l$TIebU@W6Vr2k(h17-#cBE`IGkd4TRnL;q*A zjLizZ*R>hQQ%-B;*O2)V%QFx%dv(azDeCB-9+ zvjzRMtXrT8TxQ7_vi&nFCbDSp=0MmpC-%6I^aJ1b)!w1dxzb_AKZt4h;A^bHAa+OyZ(9E zI3kn`k1wnh-RZ>^{6$L!?f10#nvbobZFdJzaE?f!fvk$8E8beizchDI-x$i!Wait! z(h3D{LC>o_UWOY#k1aoncDffo8+brexm8Z7l6JE(hoI3wU^s&TOJ-%$wBb({T^2yy zhWaTN`Kp0`j+7SqIHUe7lPgHrxb(E)=T3K?m{u&-dl3`f|s71KD4m`>zkFL z%jF~afWO)O8nALy))Ij$2kR=A7xDvG884RtTmAfLjZ1K$re-i5q)V`pOv6$^s^%j{ z-`ok*uh|yGAhyLD4(4OUC20%Ai@26*xf>ic!WV2Nuu z6OJ{#dnNNr9RSmYT>~Llu{a~6Ce_us5-1G+J%wX&7H-*+QbHe7{aJe<+WS=^h%h3t zK%*Q}A_`&yYtTNc#!)hJwg8eJ9v{0xzh8HdB@GnO9y$A;F2SZ#?syxwNvo*wRS#WH z%rcvfd#nidV2o!DErLOkmsi@oo4noPXt+|AT?a zMXNfbe;Jta7mWNnJek;jnb+UpsZeE2c8>wY>nl7F-f~YB<5>nu9^UHFQI)W10$=PWNg|@u)T%(*VmFfEWc&8>N3!T)vlz*vaW=Ey zyX5I)OaMdaR$_7x7I0Oudg5}f8W)|M;a+OvpaMdqS9IDj2OO$|2_5yY3 zj2yNz7!+!=)fkkC9={&LvDr9^b)&ehPL^}EwhRzZd^|cWhEY$9HIjI`m{uNqBp%{Q z;v~t69hDXB^im^0pJFPj(%L%j1eklJmc%DZr(v_Af!9n;#8IM1j=6TuJ@o%fkv@^eGo|6)4#)HMj+MTK00sqUEJ! zwB52jhm>CMM9{#O(ko47d?Jok>dPf{g?;;Y8dX7yP8xnhlFK&Suwgq!N_bx6U{+lt zN-Ws|ca{@5g>M!o4bBDjeCgSbHT3!{{uA^aQVIyd31%!29eJ2jH^sF!CLGw=ayD2G zS~~1)E%czlr%33O?QwI8QC{qZH*=l5YvzjbmAEzW+q{tj3gF-10Nwk12?_+fK!Dgm zj?K7-3bqjnRxGT!M~8C#pR{(Qjriqf)-_yvGtCWsnOc6c;_9m;2;zcbfbB7;`_dM9 zIXs?r4Xp|?1~$G%i|E6Fv|z;(B`5tx_uGE!IL4~L;@{8=O8D^Ns6cMn8+qQqzk0B_ zaeHTQjXH1M$jCpFScn0aiF-{W5j6mkNh%IGjnA)Z(bKJ2;8ver3vD?(bQX?k8ty13 z?c=k5L;p{9EGo|q$^2!mXVX*j;`f6{@@%pedE>Y|(;&re_=$F@bBGULHQ3T0Pnnsl zc?0v0r(~sUe310KB4&uG zj$Dl72FLa(v5>-D9gy(GOfRei#xwB3m7-FPu^$)5uJ*Idt18@tZPF16_O6};z`!~k zN}ved0_5X4F=)mtuO#wR3Di|7CFJ+BGF~ACzVJP2hKqezR9QyOGZk+pm71a&OR_y4 zN>1Tdr*1(!++Gu#?TwKUJCW%V`)-*~Nd9{NpzUxhC1uI+YO}YR*_*{kY#U*;@`8YM zIwR^!O!gFj42kgV3$8jh8w?%Rz3xXUIh_=(x8Zya1>YOnnYyTmc|%=FvOOyqpf>wp z)frxIqp>QEhMGq?}Blj@Zp0?&Cdsrg|31{aC z%#ox+oQkV=1a7-!{zNyg|HoCXeLxm5P~W}67<9Jw-7R*))4B^(%!MXBe`k>;r+uNv zUgi+OY~)Ow?{{pyVKB2)4Ao zzvl8#|1mKqGh=IG`oGS9;rzLVob4JLN)P<9D}mEjD#ry2+&O%?E`wGEd(b8n&pa%7 zSw5eCPQn{|b3Q$< z6A!4?jfawzRjdaBx+~49VNmO#PP6|d7G-1PB^0sPvSPl z=q6T7ncR$g!f!T*-EUwuVeuZlrr&cVkWGX8z`&iOVwF~mfpEY7dhnC$jS1QBzC{>N z>=RzANXuzyw0DNAF^x#otZiu$Q)Z}o#IB;&0`&&gx4>IEyKnFE z5SRw2y$UQ=O=g-B^{v_*Gf(D!u+MgB`BllP?G^Sho;g`1TQYBDT(6!FW`vLJ_|&v& zVd%KhhaEe_esmr6)1;;2bQ^YbSMBKfow{lmFWWXXllcS=nKO^XK-X6}!B5HWw-#f%muVwtjjGtnMh4VA^ zAujGPP?%NiNy&-ryHN)q=5NENND?{v(xpJ(v%_(IPf?nrx#Dng4K{7#0v;4Vxp_NG zBf24kh2^7Wy5=tfUON2b(pJ_q5WAyFbLWugw>bQ+IDBE~r9RXVfjrXFB?0Bk3@;^i zVjPX-DmmJx{A8>xM|JHa zjv?0NC$7eyFn+Ler2PnCnohcLk|2-kWZ*EWdbHNC8sYmyb> zScE&Rg^t~~Y7WoO=iN?<9QtmO>2d}XvyF&l(Y3~noBnD~X-w==N@Dgx^G?$9ZvyyW z_gs#1q(G!n^(Oo%E-YQ?JOh@2!=rIy>}n`_@t1WGe3;*$UaxiNh&J(C3rk7 zyd0@C>K<2do=Uj!GbVErwL)ct64rVU>dfFxI?8oylaD-YH4wisS~g(0s{%C4sm_eg zeXxpf9I0AT|H>Smn9}CB-t8P_P-hXQTqh)a2Y=wB@-pdhIfiu}eavendUDGP6IkX~ z)~k;Kp97tPi_W#L4`fSap|o7Bbe>b4U_XPtbb{_CkE|l)j_8>-KhNiMT;XhUiyQt) zbH2P9P0PaS_Fxp=Kf}Q{iZPNpZFE4d;pGI6(*?Q(LnbYOEPb6NsehaYP6ki#Oe@yl zv`7CRL)$E$^&#w6k_rXtzYH@Sjh&p#ZA||fW;Uv>*{pFOcfyY+Z`dK+-cB zkB8VasBC3A#uroZ5n5^8tk<%Cn3m&~ zyU9^UIZ-ep^lk(!4PB^;M^S+la_txFLt)&jNN#eYgdbytgE(cHNSvaiVhzpM`=6Fn zB}erhz)|k1T+6mm($<6UL=i>0pZ;>?_d$0%SRA+qxwxNnJ<^zq-NZ;}1O4P?(d0)$|>o z=Atr?92SD0Q?aI$H#Nh|lUg94NV`LS=J4?1BJKy+uOTL*m3`cl>k~UH({2nHv6Q13w0W!UbZ*$rdCtBww;hwR zvrs#S#FHET7Si3M;?fqEd9+S9g_Dx61fU+`GTXkV%!;zY$`yF<&W}3O-)_oJ`*wUL z)nmg0^{jJia4k(UrbVe%m%6p<$3!p8k(8+UxGakPP>kBVSWRxq%bw!}>ntjmT_Dnv zSISYndJn6B#X25v!NDSDTBmqn)|DKH@6*s?_5qPu*x@E)J*B>n+nxT>(kV8439O1x z#n{U#YhK9=?HeoWZ5mJgwI%OjO}LHi{!4u+^Uv_g9U}qGo_1h65ULk>39toL%VYY` zpdAY2*kUWK2|A*fZ*m+c+-AnGjvtVRDm+}zqD*A)AfK4Q4f|0PBz>-@oplZ?KX1S_ zU&pqUjMiNMiCReky%K}b`IEYGTj)obt;hq)g89V4zei^qXcz~E0ozs?=!^|#FLPxv ztCG**iK0>MrwDuOo_p>t>gc1#rvv#;5ZAP4IU!~bmDZ6e)OJ@c#3JzJeO|YFDeG%& z^{e|Oq`(j3+t87(%$4Om?{2kK*#1Y~QBNy@NBHG8@@2oaCj5iNj!y1Y#*S*{PG%w| zCdP(Ne>stJZ7tiqW+WfJ-e>rL&4zB{J)MG0q7)PMUN#wa$d}1cCmLZSXA;iM-%G9> ziDsHiRe41sv%5T`@uSZs9L!shxH3L3&z?B$H9QkSag*<}9eGzbBnS}a_bxmB?;N6HmXa{EH+ zGMt=Nd*?@l>|a!d*|uMy+!ELZFE}HES5d@Hag6w_2TJMeSxDT3!)LhUm6wMqSr`Lt z9oJ{Ah|LoOfm|d?r_62Rd5HSkN~1oYA$ zN#MO)2Q_qZObazX?nqXzzLJbW3$2Q@LcIG6Pkw@=)zGBB?%6vZDO%Jz(tjUwbdDh> z&!Lx=IpkgU=uBzJ=D%DwT^W0CLt*1*uE+^#JBhxr3ob9D@tXu@w9>F6Ak5{VWPvJj z9Z|~VbH1^rg-@x1RtU%i0AL>KH@+D3&+sd|R0;Dd@p@JPq4%=a>kVv+QMbFhIi!OY z0n6lmNwtIWeY^+kZF(sc#H zl8TT9mak5zPPinJ?TB#O(g6XpZD~k#SpwoJsm4>NGY}@-C<}&AzD_%0cy=Q&I*1lU zX}UB6kvWSNjcmL$3ljb+fP57=I-SJ^daAgwE_qso6d|Th1z32t`-%{H59qjoLiW|S znEH^(zzcrew(enH;aojzWz{}7G=*;s1jV1y#ehK`!$#E^j_(ssddF6;k zcWUF|`=ir_@V2YwElI37evO04YU}~cRtN*nwZR9U9Hy4)E8i~BB$=4RLFp4NODF}F8tj#@ByPKS}w8kCF44xHyO z<+I)ZPFucvsVKCQ31g9rn+RoSu-5S+-J4oB#FD);3)?N@y&7&pV1^|Enj{lpEOxp` z{vy-~KsaPMp(8us0rGFPi;NDh6T|S2@)d4HUIE`lEIi6BMN;X{a?v?~4^*#-lejs{ z)69+(BBT`uTouk4NqD6rm|qAi$e9W16XbD&3!H(j$Mf9qKuGKDq7kldh3;8J7T0|of*g5Y|{DLkqOin;N6k^UI=Z0F__EYyY9wD~CnUvBxIeePw8AR5$RpP4;35wzmB0=N zy;sEirx{~){B&r4+8XhH(TPJbIIPiM55RtiXFnW;C$|L(ZwUNS7r*_3Y?M6fpEtx+ z9+*=W>plIW1&q|Ju?&D9sc3+H8V7r&uXfk|r4(oKE0@@hFFhVi!lo<9e5r|ZJ`jxQ ztM2|J?5(rCn|In<^8F|h$igE`c?;I4pJ7Bosb7)*)>FJG1I47))>^6)NF|LEe`65% z;vh7kF3+l0{~!{9QRy`$6sRm&=Z+TFu#`{2iN8cl=oVcUy~SW<9lVgm;}mx4G&A_E z9e{lFp?~&}7L<^^IV%BSQ6EHhW+qMOx!vR|nCRtrA7`6}>|CE9juwsW-j=nL@)ItC zy&*D{I-sij&ocGg@iy3ZI3!u?1%KKB+|p5(<1)ntoZICGd5evomI~nabo=`K{(F6W z!GWgxJ4gqM#Q_W-Kd73_A?Q5A7ZAwb91>`H`(-#kOEcS^ekM}5`HU4DtA!Bf1dCrt z5qHxp%+DfF>&q-8P(UHBJ(@lRa9GsK0R#n>MYbo3F7F9Xp(NF0XzlK+j*j~y_jC}J zZ8|pv0o@mdwS%7FM1zUxWKa^{W3Qml7&7@IVsfl7Wz6J9rw>rr!PyF?Foh1BEdG8U zYJ!34kl{wjP597^N?dfT$z%~?&B2h~{~!am!uA4~AEaOOCOtu_*8?q)5z3@Vc8j&+ z=!E5?#$=K-hAIXk8x<=lfH%&xRU2iuTqMD&=FEzmFd!C!8ZJk>+c`%`8BL`@ynC_x z4N(YS#fbxZHW9J0b38>eXqBj$eA?HJ8tEv6VtWso>i~g0JXe&r@{{0Yoo`x@HCH<- zXZk&qA;h=Ieu90I9g*3q0C{9OY6uYLE9d(%lFG%Wa+LIGE}f+|T)^~jy_|OeOlaHM zhR1Rm*8=OWxqoi#&60;HX3h9pj%)QjjeLSz*x?;vqD($zQYpT9L;_*Q8cl0ViNEi2fhw`L)Oj-TTR-~ zEW#@NR!h6eqww{W+9)iNPnjv3IO^mM*ol8tE_A#MMv>5J^)Q;m#gNd3i)cZEiD}`b z!07z~=AyUb=!s6*YA;uOO-spxYo`5B_;X3me*{b4(Ur|GNB{tSDFA>!lUIN3j`$la z|IB=zYtK5YH6wZVFu#MtZ6RyK82;IZUniNi#h&os^zcFgXNx+J&W*S~+4;VoT4U^k zN~zH#Q|5$D^T7MMf6n{*_;5`}9HkxsLWtkpk)(%aw~kkY)?uXTqT)f)$^hpVGZ7mSUKX{d*fQ7G<3fYg!2tnB&Kd)+(sdOao-6c{`%b<6E&??ICp2#Fbz%+Q*fF#oEtDGrJR-CW@A|s2 zpb0|Z-Sq}{FeE!V4M=I#-_%y=?S^7JkT!k|aDN3NSm7GzPc}|8 z50H3Pc;?jlV}6Hd69(PCkNcd)WS|K^7ylFy7h6<$E=zM%96ZMzZr&A_u)?J=ztmNX z+RVeX@q6J{f#j<|ltK2WCql?$2{tPc3cjQWl+tnj10&whaRNmy zQy5R{VKmSwg=WL=N(9jnbZsnZ&!0wHwQoDx_?*#jO(xgEXMU9qPwzbWFKA1-d&(a* zdRg`)n&F^Ob+>POVqLLOr_1#}<9r$GE$A__r^!3tytAzWz6#&*TpR`MG5)^jG@n6t z+a5d$&wEqD)10*aE6za5rO^$$|GIUJ@rB7?G^sm?FzN*<)R4!)4B{a?to^at4vzL2eb z#WT+<;G><@E6+$8FHx8IXuT`GZt_=0>$tzJM=GbO9jpLzA7_~5 zGfDi{)it0u7`g(;R+Rki!V0=4o-@C_OQ>j{QNh~oc_P?F6fVsd2mo8|`msh@;`(Ju zJTiY$Ewubx7Uzxyt#^8lD`U?pNk;aQ!zh%C^}Wn>y%I@N=ZFb+Na#n8=Y%&iQ<6GV zm>qYUyZ_lYG60nWJW~sG48T%}!UdqQJFt1Tz#0d~)XBl6&pGd)GsJK{I?3Z`a`=bY z+{!b8J|^P39@Nd2Wd1Mx+xCbSI@`FE@T6j(AlUj^R&rcDc3)<4)I z4gU{o{t^9eWc#;h_vk+~2&O9Y#d(n_eSZNeJ*Y$7=oiM){JZ5?vYYP&|KcjvzcALJ zI23D?#1<%)nH}c~Gvoe+nN5FVCRpCDeVIQj2wd?+ugQOS)j%Kai#QSJ+yN0x;vzEM zV%htM{%Gg_D!O!OXy@MeR_m9c6oUK~bLpl90kABMY(7vjn9reQb{_`umA9=S7ea0Q z4B#vho-#6h(HqjtA#!mt0HIKrF%5|<8PDEd$TqJrQaJLCuhu8_?fb9QP(tUYL)giq zG+=iQfKt&r{^?zJXd(MG3w}X~Ktvu^P7Sz~9fVWZ$n~T^M~jB^!oXmTX7$bCp0(uC zYi@$wVK(JPz{=80r8@p3N9=oUagL^0kY*UyT%y;!63HotQNt~v6KC)m5$98#(!QYR zKgjlz8Oko~!9q&_!&f-zg#{#q*6`aPWY2mbbuGfM-A#?jYFdS@bVR_B8c(#ZDaWnG zXqC75cOTw$X%@U<8;2TtP2qC#23e4`-Tp zYFAsQs(?Yui678uVb3pOLr9Sg{1o8+} zJQ5r@+kt7xkZ?s*IIXk+g`DWEWczNtyv@qR1hdvdCn!hh(jr1o!zQ%5hc9R={R3@> zcE8o-gpvkkYw?`q(L)Dj<&J zw_)++ggz4a`-0d_;KWlc9{FwVv4@R!btsKWskA5Ok)Hg}xRKDbxl|t1 zi?l7ryW2aAQBw~a#fCbyB6FTM@{d`wPn8>( zC!Pon*<1z1w(Xg`iRG}B=(qPD3m^EV+I8PQKK}Dmf)gL7NB4p$@;--Pt8P>Yp z@$!HsM@@6i2>JX`xWU(7d6~MR#4@AIN)Z|DfiDKkcXYLgh7u9gl|`BF zPtJ7SaghIQ$Et|tW*@mCUzm*{tT5}>IT=hcwIs%bQ7%qpB2x^PhHDe?sk4r)(}y+x zqd~clEdl-u*ury~nK|c1z4xPTT6QRI&{0` z{fO@d^nY58L#FL^yYuyODqmKB0YCxpf2`oo?=SCQYv=fv5fJ2)08r!-{G*2d-_Ok0 zK8wFLst0xn&1^K`wwP1cOEy=|oP8|=Te3$IvI1Yj#5KI)4b5ublif{-?Z#Ib$w;e* zKL~V52HrpzO-!nuciF!Fa1kUmB7|BC8`Bu-w#-3L%$EX4(BAbmW8?5J1 z)DOsaKbo0p#Cif-9eSl!7z^6mD1 zUuEj8uKbTL1^v%upH3~Q?=oMjnXbS7>gT_#X0mZoHu!F3{AcvDt}+q5%7C(=cFHHb z32)zD@U?Hw`Ix|YUPOei7b8?X>SC15XqoA?y2Few?=(NDJ*`I|{o+lqW{*D&kV@T& zQK%5$=p~zj{3tt9PdkDqmD##A{u>G_15|3EMgKu#C~tI#sSXN;)PB&uWIdiHLm=G? zybr(Zozz1zVRsc17W$GK;ahm)fU5U~y+pl1#9NTTN1czPH>7V2Tq4;2_|{x70<2 zB1$qqTsS1TBE;Dvv>8~zrCoP2Md1QVN3>4epc%xI4cmVH9u=Raf=Az4w1Sn5JhkED zHFiyjVqy}xH6Gh>&QDte zV?=FWXA2Oscht%5_4dYZu*PzK#!r~zmS+Th^!?)TbuFjQ;@Pt4t*gCFgVy)|+ndm( za*yAx-}~;xJ73;~YChig6{gC*l$6%x3OjnPS;q6H>Fgh+)lN&K>N-Tt|HS?|mHTX? z+eInMC4ze-Yp#Ek;_YZKuzeyXE_F{^&&99!w6OQ(TRP?OcK2)lev{8xw7O?q%tw}( zp#8-@|3STK%*1DURj4f*m@J!t2~ZGNf&&jE0B2bJhtBZ@f zO1f}R{md$cg0_WhoTnmvl$z?d@bzq|=i`}cyi{b}WxLB4-&xMyywP$cf0)mqZMloh zWOFM1v+Q3#Kdx=X!hpZ)Bo-Jw*GXnuk?oUdt-bQp$Hb zQ2A}vi>9p`E&r{3d7S5s>;I^yIRQTI^^XM#_J~zH&`nQ15d2(sZ~cm}6+Vwt_&7Ny zZ&BJVIWx8J&&AujXS`SLVXZXzsc*=6a?h*6m#QCGG}w<%zrXAcaD5$?6qza!u>=_4 z|A1H+qc$O_B3X0Zm#f)8pzUGp!g3DFhKYJH9b8U#uQ@GPoj1v1mP4J2q+|Ylg{A5* zihcl(d2+jElN0EuB%`Cz()l=X;nMg2ZWeD|ET`72_M>oKZ_~x-#|v*vUizek|Dn;j zX_Lf{OiA?C_9{!SU6tx>`Y6hH+7veRAi=N?m9w2om_AF2i=F#myW(u4dztp>E>GpP zoZcdR-T#^jI=zX=R%-2vmp<| zgYtv_3%=C8p35^adHcc>N2V+K)H_^1oqKedrssC6`n`9THT~GMeVM1-B%kNanK7az2GDyWRU{Ju^F8#i}{gYh{SLal8FBP&#P zJ9E6evR<-AQ1iq!*Pj2Q=ZH^aTEWq&F#lp$M8`h|rm#lH@9+ISC4QR!@&(IPmrlKT zc<-81F>+$(oKL>6zdpYfYkhi1n_b}rQ06f+Xq7usk(O9c0IE$#OEh4@0)jcgzMymC z_JYohtH~EhkG##=)1`Ry!r@)49k=J1&g^TwIZ0uowbLvIk9c_ z^mS*;Bo-Bx%@m)UId9?Bbw*{o&&Z$37r$O9=6Ta=>$yip_mj`$KRM37f$vH@*qUS%oncg*HP4qKs(Vk65#FlNdvv_dLookcH6(jTOzkV)OqH zpO(KBTb+FVe``s2|6hCU?w={A&Ohwxv)tG6Fnv-zJa;fM=>kg=>_v@sC)6RdGBErCW;vwu>d-Z!p4x@1Q69J>1zgtQIKvCwB=p0S5N2HhP9lI!LOxmv zT|4?dO@#Jsl2GkPyEf5vqwmu~=>8%L)s4Js3tc<p+b_UQURv9eoWELi=q25UYo&gK*6r$@#ZOJ2aGcaU$F)(1X>oK&WG`oduH8Kdq0|1E1gyjGL From 86c2662c7a245e2ef9cfef5a0da363d078fd9ed7 Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Mon, 11 Jan 2021 19:40:34 +0000 Subject: [PATCH 05/20] Don't skip binary files where you can't load the binary contents into a string We used to skip these files because they caused us problems elsewhere, but we are now trying to load binary files, so we can't skip them. Instead, just skip checking the file contents for binary files. (We can change this later if needed) --- pepys_import/file/file_processor.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pepys_import/file/file_processor.py b/pepys_import/file/file_processor.py index 3db62dcb1..f49ff320e 100644 --- a/pepys_import/file/file_processor.py +++ b/pepys_import/file/file_processor.py @@ -216,17 +216,17 @@ def process_file(self, file_object, current_path, data_store, processed_ctr, imp # Get the file contents, for the final check try: file_contents = self.get_file_contents(full_path) + + # lastly the contents + tmp_importers = good_importers.copy() + for importer in tmp_importers: + if not importer.can_load_this_file(file_contents): + good_importers.remove(importer) except Exception: # Can't get the file contents - eg. because it's not a proper # unicode text file (This can occur for binary files in the same folders) - # So skip the file - return processed_ctr - - # lastly the contents - tmp_importers = good_importers.copy() - for importer in tmp_importers: - if not importer.can_load_this_file(file_contents): - good_importers.remove(importer) + # So continue to try and process it without checking the file contents + pass # if good importers list is empty, return processed_ctr, # which means the file is not processed From b6a9518f3070c42b0f156389d0191cf561b3339c Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Mon, 11 Jan 2021 19:42:03 +0000 Subject: [PATCH 06/20] Add more tests --- tests/test_load_word_narrative.py | 82 +++++++++++++++++++++++++++ tests/test_word_narrative_importer.py | 21 +++++++ 2 files changed, 103 insertions(+) create mode 100644 tests/test_load_word_narrative.py diff --git a/tests/test_load_word_narrative.py b/tests/test_load_word_narrative.py new file mode 100644 index 000000000..aeba1f6ab --- /dev/null +++ b/tests/test_load_word_narrative.py @@ -0,0 +1,82 @@ +import os +import unittest +from unittest.mock import patch + +from importers.word_narrative_importer import WordNarrativeImporter +from pepys_import.core.store.data_store import DataStore +from pepys_import.file.file_processor import FileProcessor + +FILE_PATH = os.path.dirname(__file__) +DATA_PATH = os.path.join(FILE_PATH, "sample_data/track_files/word/Narrative Example.docx") + + +class TestLoadWordNarrative(unittest.TestCase): + def setUp(self): + self.store = DataStore("", "", "", 0, ":memory:", db_type="sqlite") + self.store.initialise() + + def tearDown(self): + pass + + @patch("pepys_import.core.store.common_db.prompt", return_value="2") + def test_load_word_data(self, patched_prompt): + processor = FileProcessor(archive=False) + processor.register_importer(WordNarrativeImporter()) + + # check states empty + with self.store.session_scope(): + # there must be no states at the beginning + states = self.store.session.query(self.store.db_classes.State).all() + self.assertEqual(len(states), 0) + + # there must be no platforms at the beginning + platforms = self.store.session.query(self.store.db_classes.Platform).all() + self.assertEqual(len(platforms), 0) + + # there must be no datafiles at the beginning + datafiles = self.store.session.query(self.store.db_classes.Datafile).all() + self.assertEqual(len(datafiles), 0) + + # parse the file + processor.process(DATA_PATH, self.store, False) + + # # check data got created + # with self.store.session_scope(): + # # there must be states after the import + # states = self.store.session.query(self.store.db_classes.State).all() + # self.assertEqual(len(states), 746) + + # # there must be platforms after the import + # platforms = self.store.session.query(self.store.db_classes.Platform).all() + # self.assertEqual(len(platforms), 5) + + # # there must be one datafile afterwards + # datafiles = self.store.session.query(self.store.db_classes.Datafile).all() + # self.assertEqual(len(datafiles), 6) + + # # There should be one state with no elevation, which comes from the NaN + # # in the elevation field in the first line of uk_track.rep + # states_with_no_elevation = ( + # self.store.session.query(self.store.db_classes.State) + # .filter(self.store.db_classes.State.elevation.is_(None)) + # .all() + # ) + + # assert len(states_with_no_elevation) == 1 + + # # This state should have a time of + # assert states_with_no_elevation[0].time == datetime.datetime(2018, 5, 7, 5, 0, 0) + + # # there should be 581 points with an elevation of 0m + # # (this proves that zero values are imported properly and not + # # treated as errors) + # elev_zero_states = ( + # self.store.session.query(self.store.db_classes.State) + # .filter(self.store.db_classes.State.elevation == 0) + # .all() + # ) + # assert len(elev_zero_states) == 581 + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_word_narrative_importer.py b/tests/test_word_narrative_importer.py index 81365900a..078be3aee 100644 --- a/tests/test_word_narrative_importer.py +++ b/tests/test_word_narrative_importer.py @@ -86,3 +86,24 @@ def test_datetime_parsing_invalid_sixfig(input, timestamp): output_timestamp, error = imp.parse_datetime(input, four_fig=False) assert error + + +@pytest.mark.parametrize( + "input,timestamp", + [ + pytest.param(["1014", "99", "7", "2020"], datetime(2020, 7, 4, 10, 14), id="invalid day"), + pytest.param(["1014", "4", "99", "20"], datetime(2020, 7, 4, 10, 14), id="invalid month"), + pytest.param(["1014", "4", "7", "-1234"], datetime(1985, 7, 4, 10, 14), id="invalid year"), + pytest.param(["9914", "04", "07", "2020"], datetime(2020, 7, 4, 23, 14), id="invalid hour"), + pytest.param( + ["1099", "04", "07", "2020"], datetime(2020, 7, 4, 23, 14), id="invalid minute" + ), + ], +) +def test_datetime_parsing_invalid_fourfig(input, timestamp): + imp = WordNarrativeImporter() + imp.errors = [] + + output_timestamp, error = imp.parse_datetime(input, four_fig=True) + + assert error From b0aee858e57c721e73a7602661c7d4482c6b77c8 Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Mon, 11 Jan 2021 19:42:29 +0000 Subject: [PATCH 07/20] Process message type and text too, and fix bug with regex matching --- importers/word_narrative_importer.py | 49 ++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/importers/word_narrative_importer.py b/importers/word_narrative_importer.py index 355a83add..035fe09c1 100644 --- a/importers/word_narrative_importer.py +++ b/importers/word_narrative_importer.py @@ -39,7 +39,8 @@ def can_load_this_file(self, file_contents): return True def _load_this_file(self, data_store, path, file_object, datafile, change_id): - ext = os.path.splitext(path) + print("Loading file") + _, ext = os.path.splitext(path) if ext.upper() == ".DOCX": header, entries, error = self.load_docx_file(path) elif ext.upper() == ".PDF": @@ -62,11 +63,12 @@ def parse_file(self, header, entries, data_store, change_id): # Loop through each entry in the file for entry in entries: - parts = entry.trim().split(",") + print(f"Entry {entry}") + parts = entry.strip().split(",") correct_length = len(parts) > 5 - has_four_fig_datetime = correct_length and re.match(r"\d{4}", parts[0]) - has_six_fig_datetime = correct_length and re.match(r"\d{6}", parts[0]) + has_four_fig_datetime = correct_length and re.fullmatch(r"\d{4}", parts[0]) + has_six_fig_datetime = correct_length and re.fullmatch(r"\d{6}", parts[0]) has_datetime = has_four_fig_datetime or has_six_fig_datetime @@ -77,7 +79,42 @@ def parse_file(self, header, entries, data_store, change_id): continue # Process rest of entry - pass + entry_platform_name = parts[4].strip() + + if entry_platform_name != header["platform"]: + header_platform_name = header["platform"] + self.errors.append( + { + self.error_type: f"Platform name in entry ('{entry_platform_name}') doesn't match platform name in header ('{header_platform_name}')" + } + ) + continue + + message_type = parts[5].strip() + + if len(message_type) > 20: + # Sometimes there isn't the end comma on the message type field + # which means it gets merged with the text field + # If this field is very long then this is probably what happened + # So we find the first location of a tab, and split on that + index = message_type.find("\t") + if index != -1: + text = message_type[index:].strip() + message_type = message_type[:index].strip() + else: + fulltext = ",".join(parts) + self.errors.append( + { + self.error_type: f"Can't separate message type and text, are fields mangled or a comma missing? {fulltext}" + } + ) + continue + else: + text = ",".join(parts[6:]).strip() + + print(f"Timestamp: {timestamp}") + print(f"message_type: {message_type}") + print(f"text: {text}") else: # Append to previous entry pass @@ -178,7 +215,7 @@ def load_docx_file(self, path): splitted = re.split("[\n\t]+", header_text.strip()) header = {} header["privacy"] = splitted[0].strip() - header["vessel"] = splitted[1].strip() + header["platform"] = splitted[1].strip() header["exercise"] = splitted[4].strip() header["fulltext"] = header_text.strip() except Exception as e: From 2043cb581ff11f8e7f2adfc438b105c15fbfed27 Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Mon, 11 Jan 2021 20:23:10 +0000 Subject: [PATCH 08/20] Re-arrange and create empty functions for future functionality --- importers/word_narrative_importer.py | 123 +++++++++++++++++---------- 1 file changed, 80 insertions(+), 43 deletions(-) diff --git a/importers/word_narrative_importer.py b/importers/word_narrative_importer.py index 035fe09c1..3daccc2f8 100644 --- a/importers/word_narrative_importer.py +++ b/importers/word_narrative_importer.py @@ -67,59 +67,96 @@ def parse_file(self, header, entries, data_store, change_id): parts = entry.strip().split(",") correct_length = len(parts) > 5 - has_four_fig_datetime = correct_length and re.fullmatch(r"\d{4}", parts[0]) - has_six_fig_datetime = correct_length and re.fullmatch(r"\d{6}", parts[0]) + has_length_and_four_fig_datetime = correct_length and re.fullmatch(r"\d{4}", parts[0]) + has_length_and_six_fig_datetime = correct_length and re.fullmatch(r"\d{6}", parts[0]) - has_datetime = has_four_fig_datetime or has_six_fig_datetime + is_comma_sep_with_datetime = ( + has_length_and_four_fig_datetime or has_length_and_six_fig_datetime + ) + + if is_comma_sep_with_datetime: + self.process_comma_sep_entry(header, parts, has_length_and_four_fig_datetime) + else: + # The entry isn't comma separated with a datetime at the start + # These entries mostly occur in PDFs not DOCXs - but we check for them + # everywhere. + # Even though it isn't comma separated, it might still have a date at the + # beginning and look like this: + # 120500 Message 1 (NB: the message could still include FCS entries etc) + # Or it could be a date block marker like this: + # 12 Dec 95 + # Or it could be a bit of text that just needs adding on to the previous entry + # So, check for these one at a time + # + # Here we check if it starts with 4 or 6 digits + stripped_entry = entry.strip() + if re.match(r"\d{4}", stripped_entry) or re.match(r"\d{6}", stripped_entry): + pass + + def process_comma_sep_entry(self, header, parts, has_length_and_four_fig_datetime): + # Parse datetime + timestamp, error = self.parse_multipart_datetime( + parts, four_fig=has_length_and_four_fig_datetime + ) + if error: + return + + # Process rest of entry + entry_platform_name = parts[4].strip() - if has_datetime: - # Parse datetime - timestamp, error = self.parse_datetime(parts, four_fig=has_four_fig_datetime) - if error: - continue + if entry_platform_name.upper() != header["platform"].upper(): + header_platform_name = header["platform"] + self.errors.append( + { + self.error_type: f"Platform name in entry ('{entry_platform_name}') doesn't match platform name in header ('{header_platform_name}')" + } + ) + return - # Process rest of entry - entry_platform_name = parts[4].strip() + message_type = parts[5].strip() - if entry_platform_name != header["platform"]: - header_platform_name = header["platform"] + if message_type.upper() == "FCS": + # It's a Fire Control Solution message + self.process_fcs_message(timestamp, entry_platform_name, parts[6:]) + else: + # It's another type of message + if len(message_type) > 20: + # Sometimes there isn't the end comma on the message type field + # which means it gets merged with the text field + # If this field is very long then this is probably what happened + # So we find the first location of a tab, and split on that + index = message_type.find("\t") + if index != -1: + text = message_type[index:].strip() + message_type = message_type[:index].strip() + else: + fulltext = ",".join(parts) self.errors.append( { - self.error_type: f"Platform name in entry ('{entry_platform_name}') doesn't match platform name in header ('{header_platform_name}')" + self.error_type: f"Can't separate message type and text, are fields mangled or a comma missing? {fulltext}" } ) - continue - - message_type = parts[5].strip() - - if len(message_type) > 20: - # Sometimes there isn't the end comma on the message type field - # which means it gets merged with the text field - # If this field is very long then this is probably what happened - # So we find the first location of a tab, and split on that - index = message_type.find("\t") - if index != -1: - text = message_type[index:].strip() - message_type = message_type[:index].strip() - else: - fulltext = ",".join(parts) - self.errors.append( - { - self.error_type: f"Can't separate message type and text, are fields mangled or a comma missing? {fulltext}" - } - ) - continue - else: - text = ",".join(parts[6:]).strip() - - print(f"Timestamp: {timestamp}") - print(f"message_type: {message_type}") - print(f"text: {text}") + return else: - # Append to previous entry - pass + text = ",".join(parts[6:]).strip() + + print(f"Timestamp: {timestamp}") + print(f"message_type: {message_type}") + print(f"text: {text}") + + # TODO: Work out here if we've got a state entry in the comment + # and if so then parse it and store it + + # Store message data here + self.store_comment(timestamp, entry_platform_name, message_type, text) + + def process_fcs_message(self, timestamp, platform_name, fcs_parts): + pass + + def store_comment(self, timestamp, entry_platform_name, message_type, text): + pass - def parse_datetime(self, parts, four_fig): + def parse_multipart_datetime(self, parts, four_fig): day_visible = None # Get the parts separated by commas, as they're always there From cd97df7f4f128793389b57eaef628a1eb3eea6f8 Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Tue, 12 Jan 2021 09:16:02 +0000 Subject: [PATCH 09/20] Deal with error when trying to load binary files into highlighter --- pepys_import/file/highlighter/highlighter.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/pepys_import/file/highlighter/highlighter.py b/pepys_import/file/highlighter/highlighter.py index e3eea2ff9..42997df46 100644 --- a/pepys_import/file/highlighter/highlighter.py +++ b/pepys_import/file/highlighter/highlighter.py @@ -89,8 +89,22 @@ def not_limited_lines(self): """ Return a list of Line objects for each line in the file """ - with open(self.filename, "r") as file: - file_contents = file.read() + try: + with open(self.filename, "r") as file: + file_contents = file.read() + except UnicodeDecodeError: + # If we get a unicode error then it means that the file we're trying to read + # is a binary file, and we can't do highlighting on it, so we return an empty + # list of lines + # Note: This will mean that any importer that tries to process this file + # using the HighlightedFile.lines() method will get nothing, and therefore + # the loop over lines will never execute + # This could potentially cause some files to be skipped incorrectly, + # if they are text files but have unicode errors in them. + print( + f"Warning: trying to process highlighting for a binary file {self.filename} - skipping" + ) + return [] lines_list = file_contents.splitlines() lines = self.create_lines(file_contents, lines_list) From b0c0980d67ed3bc07a02eb629d05564e0f5faf00 Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Tue, 12 Jan 2021 09:16:30 +0000 Subject: [PATCH 10/20] Fix tests --- tests/test_word_narrative_importer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_word_narrative_importer.py b/tests/test_word_narrative_importer.py index 078be3aee..250324cec 100644 --- a/tests/test_word_narrative_importer.py +++ b/tests/test_word_narrative_importer.py @@ -29,7 +29,7 @@ def test_datetime_parsing_valid_sixfig(input, timestamp): imp = WordNarrativeImporter() imp.errors = [] - output_timestamp, error = imp.parse_datetime(input, four_fig=False) + output_timestamp, error = imp.parse_multipart_datetime(input, four_fig=False) assert not error assert output_timestamp == timestamp @@ -54,7 +54,7 @@ def test_datetime_parsing_valid_fourfig(input, timestamp): imp = WordNarrativeImporter() imp.errors = [] - output_timestamp, error = imp.parse_datetime(input, four_fig=True) + output_timestamp, error = imp.parse_multipart_datetime(input, four_fig=True) assert not error assert output_timestamp == timestamp @@ -83,7 +83,7 @@ def test_datetime_parsing_invalid_sixfig(input, timestamp): imp = WordNarrativeImporter() imp.errors = [] - output_timestamp, error = imp.parse_datetime(input, four_fig=False) + output_timestamp, error = imp.parse_multipart_datetime(input, four_fig=False) assert error @@ -104,6 +104,6 @@ def test_datetime_parsing_invalid_fourfig(input, timestamp): imp = WordNarrativeImporter() imp.errors = [] - output_timestamp, error = imp.parse_datetime(input, four_fig=True) + output_timestamp, error = imp.parse_multipart_datetime(input, four_fig=True) assert error From 07f652ed1e7a39624763d1a840b0597e162e41b0 Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Tue, 12 Jan 2021 09:16:46 +0000 Subject: [PATCH 11/20] Remove old invalid Word file with docs for a format in it - can't even open it anyway! --- .../track_files/other_data/ASW Data Format2.doc | Bin 31743 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tests/sample_data/track_files/other_data/ASW Data Format2.doc diff --git a/tests/sample_data/track_files/other_data/ASW Data Format2.doc b/tests/sample_data/track_files/other_data/ASW Data Format2.doc deleted file mode 100644 index 4c1b1bfc7bdc1242431e74091b2a20a8624afb64..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 31743 zcmeHw30zfGyZ&Yr5ELiGoB+oZfdhzB&J(2qPG~qHA}YvHAkmI~SPnTefrKG7i&t@qan##G)v){8k2hgPbo6Rh43?TdX`2WZPmsfnQ*?s2f zsA<2I-9{&Bc4gpHJIzJYu9woZQre*5gNFOp^{=Z}7CN~)dTF86G%ccyosR3a!BVKD zT$s(J@0C6%uNrL9GoM;GGF@I?>i#UXG|?N<3Cw(KDZF3mkNVT7XA@Pw&`Z-Y5!N>? zbbmK33qJPXTOKKoM@cD9BX3Rn5QG<=(6oBEZUy=ZWKiT#c2M#aseY*ZQV;l%H!kT*#hD1TeoZ9&8PFz5$!$xNxsL^U833r1_uY> zh(Q`0%CvP)7TPe%j>1qB2d0)iqMfFvLoxn6-m4IA|c z3<~YkqkAB$x&#K=QU=>n2H&GZt}rDEicp5yQij~4MDAgf4Q(kyg)%ZA7`ca0hS^d! zyw@jkg(-{c*~peM>>g#Khf_ATr9^v+Z5uzFvWYDv+6!e!F|QP}t)Y-Vo7|%;X4_(V z8Vojl8W0SYK8snXc%Lc~5GFwZAPrHpp~&_MG&BzC5oid}!zd`ILl2uEGN@2cq@h;Q zAf2=cNP~qml%xo3OXwPGU21?n7J(l@Ra=)DppS(Txx&FvTvc0_8t#Q|amN%hZ*U>= zT0$45Dkg2QK7+-)9iwO?LkLG|P)KO6LW40lM0cL0O|Y~H>1AW-V6n8Ng+W>b7iuAc z%hDo9S{MqoC}i<_gD3=pqOjedRN)~XEEX5u0S1r?{{YgGI5G@=4x*oJ=a^tIw<2R( zU95l*asXKnCMbiU7?}(oMTU{5$Y6J!tg(tJ+ulNC>?kq}J4FV4bTS5`9>;~q#IuSF zV@{F5mG{W>S+o$Dty=>OHoYNs?g`njsBR&)Uh)YrGz##!*Iy9xy#gCU%qF0-@qUP{ z{XrFGLuIf~!6uE1^=9jUB6PtPI=Fyzw!PWfzle+>7Ap8#r}FUlcV3VG$7REPeo4R` z2nPlMaX>sU1V{iffC<24;3Z%xFdbL`ECdzs+fmOh2;5}d~kPUnRd=DH3jsVAj6Tm6p&doblf4O?$ zOzzFxpYiWzZq7}dzL$Kl>^FLoV#PM4n&#{^w9?=j@JfZDtE#WoyoO$`_<~;Q>9|5q z_3L}JY(pCR4ny?xt2)E1N_E6y~wj=Q0Ssi zKmRKiS8Qeb#}X9c_TUTKJ~a(@UkQWtP~p=E{fbVPoE`?18Na82v%v2_K5z$cg+D6* zUceJTWuPih4e$Yi00R&PFb-M+ZGlK23g`j!1bP9zff!&okO+(f7&j(hJTMiQ4!jD? z1ZD%T0ds-nzzSeBz&QH=$O1kFwgWqWuYg^^*T6Tx9^gA*FR%~z5jX=dMlS=u0eL_H zpdn7}00+Pca0XleSD+-|4wM4Q0_6ZNpc>!8--}ZhZ=B*kOL8xB?;4f2j@`OSju-VY{&E$AKE~;0%RF&!UQn}!8u7(v zjoy^64bT>75A+10fj$7o@(^GskP18pj0K(t#sTAj3}7ZO3wRyi*k24R1=a%Z0~>&i zz(>F);1ggs@HOx~z`5dQ;0SOIxB&bDTmgOqt^#?01LhA$z!l)!;t7-oynyOJ4WJfK z52z0`0671I0*!&DKwF?A@H7wsL;{?jqJbEoFAxt50}_Fez$k$8*BD?N@GsycUd= z3fpMtP5C|qHUe3|CSWJ93)lmE2kZw90zU(}z**ota23b{ZUKJ)j@Vf_0VRRbKpCI{ zP!*^S)CIzTX24TG8=wOa4s-^30MWnzU<8l?qyl4s@jwPJ5tsqI0?Y%J1FL}5zX_yYJ6*aI8}jsquvbHF9w7vL&z3%Ct9mclp(Dgxd>HNX$32^fG7pgGVI=mJCl zJ%HXo43Gc}18&{8a^uRyD;G~4Ie6sMLHyefsQh2a>BJ;Bbx`dHkan*4w(fEI(R#&F z%N5$_KEAKV3-2j`i=Ao$wbo-!T>quLH{d?&Oa7?&`1n6!fma>N zYGv_re|h|7JUd~zP}LEFE>=RcjUF>KcxxCywxp58z-@GrmzZi;-yL5q6E zQR|9yK)9Xbh;UbzcBX!3cW^y=d5t>m;(MD*o`kM$~hd5~a zjJM7u7AZN|a#56$w$obsTr)4x+G+0C)$;Jfn;7l1cx?zcJ_nv@kS_}P6d*xAkJN^u ztONQ@MlFwS+AuB8yaB1=66EZz^#H{naEM3i!ctbzD*le(49#)Y?A#oZB;$mVSQguB zxS`eRYKcDA&7ULHERqE|m(cn6Xr0kts@}J~h7CH7sakw1JM9`$cUMo1`?nzHGFmuf z=z`YNAVt?n*(wdx@!DWeHquIK6}}4AJm)&pEwR-ts?_N+V=A~;T3jX6XIG8Bb$+Wi zDZo4Uy@svsz1^qx$5VOOxja1Q1g|-1{ef6q_tpxG*UWZyz4hPvMG~(^ki^P`}ED zD_=SsxzpKgi_4_tzvi!MUa)S)trK(4`FJf@?Y#YD+m_!9Ju$lZwu~2dPhS3REtfuH zMow8~I-E6NYt+7FPwg>OUJ?FL(CcMOPdya5wcENjgZJ(B{Nb7QheHR|%y@V9r~yA! zpZ|8H<6)hDyy4aDr$z(U&0M+LykUFLw{J&&p7Fl_nf8v!jplY9c6!i~m4TreVJDAg{WD8=Opo6a zFm=;!PVB63_QDwN-5XyickJn1sa?m{I52DS+N`5KIa&1&`2OmD zwErhHhc}>(PXlJM0j9pY^m1gj- zuyv?&!@P?ANqwLF?9wNm-BSnGPwAKU#y4l|_Z2LCv-``=XO`bcj{D_W^r~ap`~^Fn zYx>sccl%!q(W*>2zA9_Q>J9t)t=;7IR)tx2o2=Q=ug|)MeaE#o|M2pj#I@0hV;gMR zQLmxT?G1*B1JP3r|jYs%KtGgU|CnK5yU3p;B~c`{A$6_ATW* z9B>A$Z2mfEs^^wn+S%vZl}IsR;kHn%$O z8C$U9)PO}T{#9_M&bINl_c|OaKPGB&{;)PRH$Po!U$w~34BMwCHmNi<@9FDto!69I zcp>~``Ag3}^+&UMqxUwdGe5TUy7qmYe{~sJy~XjYtIc;F@9+Ory_uU%9lFtY)P!c4 z8@wv^_gZ)9lX{`~J1^{N`H6S0Q6}%|H_s-Y+CR;hklFg!@)ocEGQaVVE5Vudp7MLL zYxuTPwq)aUeP?c4Mmc-HV~`0A?5rzZ7u-S$Gy%OM{&Did4br`c|P&HY|p z7SV3`_1#Cx9yiuH|I?D^Zf^f{d+lj6%1^&?J1zIK%wDtmjN4ZARL#6tQ|K#e4(6X} zvAk}FEq!ktZ+2v@=Uw+vpItaI^4O9uUGpy-JswuIN$oD>+HI>;GSdJ3GFMI;jbBWe zKR?=`%h3x5T)PZ7@Nv2Ghd()0@u!cM^lV|iID6-nMp^Mjd)H4F9BQ#|<+&3PADs)U zSY~|QmOQU&?{0B)8#H>_^M^B!g`GRz@>u4%19>|(ogP+kiu2k{okuSl@zK%UNBZT@ z3eM?Xu0wM8hO6_||K6^j_x_Z#Uq1bH_ugLHBm3GIte7_uBYTAKPGcW)4TD5@ho>=ij+<{$H zhR&EaZ{v#_B0frM(#!4B?C%CYH)8*r32Q3<*tvV!kox^{$}eo&B&z>r=ZUozu3y!9 zeFgh@y^eqT{<~AQml{&@hpmUbE5sVQ-ZZtDYZ_5?+NgF(Q!`JvEqt!r7Qe|u?)-S= zP?aXt7uHH__49=e^Eb|4)H0#wCs9GQDmg|*m+Q19Ye%=VgYG%=D!$=R{^Wa=+)vCe z*>aBii?wfrd2T3kq{8H94!+ks%d5-mi01wC5_(om4mex>yZw`L7ck9<} zcXVq$Co5yrCrjr1GN{ULp>sAo`@u%HC$GK#tlg5)Z97eATXyyE_4R8kXm)#kdYcx} z?XI6Z@9A-{_vKiZmLpf+owjn)VYlvk|FBz^6MFdU>xcW~MRd>J8{D|rtIv9!+S=#h znm3o8^gnWa&JWIcJq)GC<^7zT*=$?=%_jrDetq~8=I{IN`^K(sHUIWa?R}42c6&K( zMW{nSk8hWhj5l~}vCkR*V>6#V$DjA=?{I76wxoiJr(2ngiQioeiuXupmDRChcvSC* z9{w(eCVw>e!(YqC-xzn{gL==d+cmhyj=pa`yP?GC3F|+-@({hkL|Z2Rog=)8J-c7yn-Q7cM58d1cH8 z9ipr53w&qG@(x`qe0OrwNAn_YEU29sZ@2EftdHXMjM@5eU+)rKGjDi1e{;99bFWt6 zW`|4FEBJ1xUjO=wKc6%0?Ner)Y5g7FT^BoTUent7g7dHpkAcVHN{k2|)^py^ntDx29`N>c{`S_~zHGvYq2EoW?5)W~VdvR>a`AJ983>79jhkKV*Ku=tL^W8bVrA7Lj@8T*AgMRo+T4bqoc zn|*<0Thztvi=gt8@?T7MF&cl|>^Zyk(W!ga?su4Ux}ir{%bHy;|KhRE-zDIs*iYXa zw&jhtc9)9_&NU2O{e(-?3$Kq_@cD?3FTdKVn^(lNL(K-SANb;{#zwzXNSNKJZkO6M zyVVUa%ugv*=b1T~d#CrfHDO)jWiz8s`z?E?^{y>@!}FgTzxe7xyKy^eW)Jc-9ndPx zi&|jbaJTUj(-H!!d-W^m+B)^nWT(1IzxwugjfJZ_JAU0~^X4y)K`~L?&b|RQ_Yc}6 zqnXWdpDlA#r@{=1ZXB)D4PZ~3ZDdixq8l+Qp%*sn#dbC}<}b>r=&s1h>BswV()UP3 zc@zx-E04PPvDj9cKM9GbPEL5DfZtT`uOGDM@5A_Yr6u-cKBMDPQimCnoBQFND?gw3 z_{KbC#N=z zZSFTTEp24ez`)cvY+7Se1B@f%lTkOsm@*gzYnDAEi0GZ*VYqVBY{!^oG1dwvJ+5}G5xqj;&acG9;Q znaW{jkDK=Z$EntDne>tuM~&gEVbi8k*|P0bfu^@vsR&33uA@Z%3ZJzGvSZWqn+1ooi zI5|4mJ2;x_X~$cS(LSRadFUM>?SW?{Bk)upRpV2bc=YFk=QKwBiBU4@8#>Q|af=;F zcvei{sk=57G=o5ygsTwgy`;0FJrBM2w)g41HJ)b-!&42?G!hy|CrIq<02+JkxY?0b zkz)}}gQG3lvnQdHz9OVbKa=kE-JQeBTFMr|Mm!prL(D!}sUcMp&=<6#?VK!a@K~wP zVd)Be8S#{hJ;m!!URvKz#+q6iMSOo5eejei7EhFhqL~ju^r{Qq1N?2Ty=NUh&&FF9 z-#oXIVBTI2rEbVAMO@h7`4fZMVOC?Lg-cn&V_GfkH@ZW~tQS5$J)#9%N-TETsBP3< zcp)XIFiAG-PyCa^X5>6mqYVAzqTS(>iTfY+;Zdw`y~@u$Ya;K7i4k*l#)H6VigRA@ zefjy`r_>XVY^+L+K?_11i!mVVr+JOhmt7S4LsYX_jU?6TaS96*VDfcvEAC2`9G|@C z+O5c;o@-{iX!YxCIK;?mTm!GjAZ7i0fT6J%;MDUTz?JJ`fK$?!0G+fK;ADOT-~#X) zzzLiWWy#A0%(%SLOgYf(tK*!D*1iF6=RKFH;?QwHfsq?ChUh7E!) z1Cqa$Xa5o0MS)^fIOlMwQlz zrw-d;F-tx)^e8_JZN&mho_2%yy8XIYQcjsH=g|@P%vV0Kk9xXo4`1G)XxaoVMk`}) zcGt`WxC*yG)iA9Fwr&%!lxJYvX$#Md=B{I_OUGd_LgjT)myOt%atf z=F(=huZx{an+$VF&DUjhhS|>7<$pF`2mRhamx(bkIf*fG-qBh8^P{s8V-jQf=f~uv zXMbUlNwrJQPR%r$EB;+>ECF z`H3+JO=5B=6)6&$^v}0SXW^#ld`cw77)^;WshPz6xy5<2MgROb@BaC0Q<$65l^n<` zp$Yru6{+qgT|yJlFri7DcWNeE#(7(%v+^S+)=4|zTb!SgT=h!{@82&uQFiJfa@zLW zQEcoY{K$>6k=I>VTk=LzY9?&&%5t_7e=tqYK49^^;**|Dxnpuf3-((O=k3HY_RZ3Q z{`oFU*(T1LR-=9QTlknn9((f9dkr$>GKLsUA{Tq7rqVy-f=JJ1nMlagl*?8s21RDE zI@{3`>TJsW#nSh9={t;lQ##s=QZwp##cP&FXED~|ylGKJh>Qf$n>J?8j6v#44J9U6 zLOv`JI*C%%dqed8E2a%oZhnT@-jthRHsxN`ukY&TJL+s_%C*XDu~+5P%!DSifzm8J zoBAn>Q3ho(T9f5GV=Sa*Rul`WGLey2v?Dzw{?0Iy-+lV_PtD|fAmA8|&Z3slS+oz! zBT|hfj!It9dW^_!b7{+Nb7@OjBswc1l~n!nc}~rY^JX1l$5|+1a>&JKQr7AwzJe{% zv+tU6F{*EyauGF@uOL197K$>=JVQlOE~X7u(WIz!W}^%V5|(?4I-q1&mB3Ic{V)giqMYvEK_p%^H1J;Qv*lv_fCrEnBfR{(0< z)K$8xE29*EY7U}Eh8cNXf4#k(tzu)vR+`atH#*BuSSfmis!|v#OdnAxDy@7ZLbFEZ zR`C&yLEp-lV4fVF>BcdmOgc!Ef@m;1<`_z4J?o{2_q*xYx*yT(c6zplC@%hzfD=zK z4>lMYWTCpS41kghFy^)_k_58&nIXv%;bT=VPSh(U z4&(H~p(1BB^+4v-%vY>b0jY++Q($B_uF*$G^ha@A86B2Wj zEINxZD|t?=)J6s&<>z#!WS0RZ8dGy#bFpCw07%~&rJu}ra24xtdB zcTzLm=o>YWum$V&Nyb4bcQ-XtbzmKE5`~;oWnHw+ph`U|OO-kEiaHdVbm`erN+XCm zq8{sLAkN@48{?P4sov8?Jy!}z;gn)bvD#s`!&2EFum#KMH7HO?T}ipDGgMU|wQ9|g zD{H^DT2o$*E^1A!L?_O<9F>$=W@82;bzl%u8?NA#OdO}|BB3FpRq~`GA8YRxt-Yx= zIZ#V#A$n3~_DE`}V+|qu0j3hz1&J|bM0>HEw6oH&rER~KVfrL>lCYH?guB`sh?Wd* zB?Tjbn#+zzv=)DgZ|VPngeI=6XYX7ErM{TXCAlJsZqln*+-L&5y-Wen%8{jPh4RFh zO7w;DGPzrAMVX{ldX{T?TSb^<=t5^1u-3JdTxcE2K|N%MO1~^61?@@-Q?BSFKBolC z-A#Smy?o z%Q!1Y&BQvu36f{V1l9qgsUpWFwWpP(j(O2stjfHKAgk_{cq=0#K;{WbE*lui&GE%+ z>;&SxOVAFq1T|+lmP+WYuDF2E;-nHUu#M<4C_nv9t=JmU z^v|zIePzCtUA_2?3nzC%)KcPzizT&FeOs(uncCRuYP(!o!M9pA%c-Nas#vfW9~hUdhWQo^(BY@}4xS@*K=cY1b7`c}15k-}{`c7sI~mBpJ5 z5wT@&!^mFQ~oN;gw^b%P~R&I$TFzEz3H9AYS=+u!EMJ4VFiE$4B*I2fcxMgQv z%u91K;QmO@hBnd$c^4IgQcE`>G_fGE9F9`X=qi5aT|`Wdqs*s_8mykBXhw+>rW`Z8 zQR3)f>=eZ0II#uu96^UIvt@e=N4-NQB_u_@Ycye}tZy8hB%Y-9kJL=GczyMMGY*RODjb8uMvlZ+&eN&BZ#*D;o(nT183_yRwZ`io(rhC@w{!(9$i{ z5u@;$^=hD57SjN>5Vjn-q!d@P8=2Es%5EvXaF?9)ruvgIF#=^`Ioy{RW6!em>`RuQ ze@>Le_Jbx7Q&VbLGD*dGN3NMtY8oD#JjE?C_fi&FGTBPn;4z@lq)!5rnDVJ!Ewg2) z1cm5Ey*TWxda#_OwbfVkO#7%IKPJ^%W}i$T?m0YKg0=?3Jq^HK1g&Jrv)u-1>M^rc{X;(HE0tIeMNttErw-1a+jf z#aiTR_3$Xmq(wVZ18PVKxduUx(n_bIx~i=YRwF!!7X7eP=+7E(b`fuiZ+W(AYW1va z|2W;#E3za}Mvi0MQVViwE_^8;|ewjT;ufKkhwWWf^t3qd$ecYs&Qy z?N}y}DPFy=UYPES>jl~CNGbPEwp!7*tl^nUz1ksCFL^45Q;u=<5LUT2u`IM$i^iIyxc&>z6Ez5oyU_*nL$WoAtk-Ug$IvJeuFoGoMqk?j$E zC0ly&l_{6A4Yw)Jbfdo1LB<`+xclYKRh~ezj#fBdq)(1^?uDgK<~d3kAGAW8H!aDO zV}Lw3`*S4ImgLAX+KOX`Jk-vWtr#Di^;Dldq354I!-k2l13@U@;k@K;n{r)gMhO*_ z>a~iiW&V)I*FLnInzO`qlvCnCbfm-?X38s8qE%Q%Nm*79o#n=d+ui1J7L!!wFXb=G zvv^1dPue`T&M-^dqEsGRm#~ctj)yz?^J&HaMz=ijm9t`P4Fw8Ch(fZ6h?{6^nVJ~R zbW2Q5hM75azs447GLyg%_s}3zT5fEV0XZqbEf0T8jo`<|08gixQVE8?{>&4P<(aZf z2CWPVHI=KLEUv01J6TyWxZ&WilVwBpl(u_b%A8?lILQ>P?<`AmJHQ2u1B0Q-p+YXg z(;9Nho|KF2$%m6A^W?}yM>b;Q$TGH(rwg2T*j^SmMWOgeJ6T?&PHFkS^*j9Ie*gU4 z2j`V|eImo`@GfrAHNJfDZ+iUM`#<&v_8iCzsO&lC4ZQ#3e`_7z|Je`lwJN@|G7#W< zK0^V%=Q9%E`#5|Rt0FJ~VB4tx-`kl5@Vyj?40r3AY4R2(w+qB0rdfYpaBp71On{efRx|kgaDyHLm&*`GXvJ&1)5s&ye1wWRtxYW zStCB`^wHYuf9Na)AHe>V!%K6rQ`{Is&aR9me6ysV@%Phgj&%p*KgN3sWfLsr2Ae;i zX4CYMv!yh9y?@3n<#-fAK8sTSd0H=MpMpOq)(&YLK9c1}#Oe>eJqoWU&_BnJvcaR! zunqs)Odd}7Y{&IoMZP<^=H>ZIpA>QD9q|3V)RYx;Nke;Cid zQvXl-XY4+n6KyT{pOv?R_xZ6twJth$`ye)~-?#FsR-Yz~8sP7Vg!;5I#yvLzeK&?63d; From fb700c391aaaa6450cc3a80619bd709c3b4cb86a Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Tue, 12 Jan 2021 09:56:35 +0000 Subject: [PATCH 12/20] Remove fairly pointless tests, that now cause problems with file processing etc --- tests/test_import_cli.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/tests/test_import_cli.py b/tests/test_import_cli.py index 2896f4aa4..c6fa6e66e 100644 --- a/tests/test_import_cli.py +++ b/tests/test_import_cli.py @@ -18,6 +18,7 @@ FILE_PATH = os.path.dirname(__file__) DATA_PATH = os.path.join(FILE_PATH, "sample_data/track_files/other_data") +EMPTY_FOLDER = os.path.join(FILE_PATH, "sample_data/track_files/empty_folder") REP_WITH_ERRORS_PATH = os.path.join( FILE_PATH, "sample_data/track_files/rep_data/uk_track_failing_enh_validation.rep" ) @@ -192,18 +193,6 @@ def test_import_with_wrong_type_db_field(self, patched_print): assert "ERROR: SQL error when communicating with database" in output -@patch("pepys_import.cli.DefaultResolver") -def test_process_resolver_specification_default(patched_default_resolver): - process(resolver="default") - patched_default_resolver.assert_called_once() - - -@patch("pepys_import.cli.CommandLineResolver") -def test_process_resolver_specification_cli(patched_cl_resolver): - process(resolver="command-line") - patched_cl_resolver.assert_called_once() - - @patch("pepys_import.cli.custom_print_formatted_text", side_effect=side_effect) @patch("pepys_import.cli.CommandLineResolver") @patch("pepys_import.cli.DefaultResolver") From 9353cdb3d52db038337cd4cabc87a61c3df21f79 Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Tue, 12 Jan 2021 09:57:11 +0000 Subject: [PATCH 13/20] Add outline test for files with no hidden text --- tests/test_load_word_narrative.py | 68 +++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 3 deletions(-) diff --git a/tests/test_load_word_narrative.py b/tests/test_load_word_narrative.py index aeba1f6ab..9a7a06088 100644 --- a/tests/test_load_word_narrative.py +++ b/tests/test_load_word_narrative.py @@ -7,7 +7,10 @@ from pepys_import.file.file_processor import FileProcessor FILE_PATH = os.path.dirname(__file__) -DATA_PATH = os.path.join(FILE_PATH, "sample_data/track_files/word/Narrative Example.docx") +FULL_NARRATIVE_PATH = os.path.join(FILE_PATH, "sample_data/track_files/word/Narrative Example.docx") +NO_HIDDEN_TEXT_PATH = os.path.join( + FILE_PATH, "sample_data/track_files/word/NarrativeExample_NoHiddenText.docx" +) class TestLoadWordNarrative(unittest.TestCase): @@ -19,7 +22,7 @@ def tearDown(self): pass @patch("pepys_import.core.store.common_db.prompt", return_value="2") - def test_load_word_data(self, patched_prompt): + def test_load_word_data_full_narrative(self, patched_prompt): processor = FileProcessor(archive=False) processor.register_importer(WordNarrativeImporter()) @@ -38,7 +41,66 @@ def test_load_word_data(self, patched_prompt): self.assertEqual(len(datafiles), 0) # parse the file - processor.process(DATA_PATH, self.store, False) + processor.process(FULL_NARRATIVE_PATH, self.store, False) + + # # check data got created + # with self.store.session_scope(): + # # there must be states after the import + # states = self.store.session.query(self.store.db_classes.State).all() + # self.assertEqual(len(states), 746) + + # # there must be platforms after the import + # platforms = self.store.session.query(self.store.db_classes.Platform).all() + # self.assertEqual(len(platforms), 5) + + # # there must be one datafile afterwards + # datafiles = self.store.session.query(self.store.db_classes.Datafile).all() + # self.assertEqual(len(datafiles), 6) + + # # There should be one state with no elevation, which comes from the NaN + # # in the elevation field in the first line of uk_track.rep + # states_with_no_elevation = ( + # self.store.session.query(self.store.db_classes.State) + # .filter(self.store.db_classes.State.elevation.is_(None)) + # .all() + # ) + + # assert len(states_with_no_elevation) == 1 + + # # This state should have a time of + # assert states_with_no_elevation[0].time == datetime.datetime(2018, 5, 7, 5, 0, 0) + + # # there should be 581 points with an elevation of 0m + # # (this proves that zero values are imported properly and not + # # treated as errors) + # elev_zero_states = ( + # self.store.session.query(self.store.db_classes.State) + # .filter(self.store.db_classes.State.elevation == 0) + # .all() + # ) + # assert len(elev_zero_states) == 581 + + @patch("pepys_import.core.store.common_db.prompt", return_value="2") + def test_load_word_data_no_hidden_text(self, patched_prompt): + processor = FileProcessor(archive=False) + processor.register_importer(WordNarrativeImporter()) + + # check states empty + with self.store.session_scope(): + # there must be no states at the beginning + states = self.store.session.query(self.store.db_classes.State).all() + self.assertEqual(len(states), 0) + + # there must be no platforms at the beginning + platforms = self.store.session.query(self.store.db_classes.Platform).all() + self.assertEqual(len(platforms), 0) + + # there must be no datafiles at the beginning + datafiles = self.store.session.query(self.store.db_classes.Datafile).all() + self.assertEqual(len(datafiles), 0) + + # parse the file + processor.process(NO_HIDDEN_TEXT_PATH, self.store, False) # # check data got created # with self.store.session_scope(): From 30e4dfa000e70cfc35e5b59866dafa442b04aa63 Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Tue, 12 Jan 2021 10:38:10 +0000 Subject: [PATCH 14/20] More work: process date block entries, parse single part time stamps etc --- importers/word_narrative_importer.py | 102 +++++++++++++++++++++++---- 1 file changed, 89 insertions(+), 13 deletions(-) diff --git a/importers/word_narrative_importer.py b/importers/word_narrative_importer.py index 3daccc2f8..0bcb380fb 100644 --- a/importers/word_narrative_importer.py +++ b/importers/word_narrative_importer.py @@ -39,7 +39,6 @@ def can_load_this_file(self, file_contents): return True def _load_this_file(self, data_store, path, file_object, datafile, change_id): - print("Loading file") _, ext = os.path.splitext(path) if ext.upper() == ".DOCX": header, entries, error = self.load_docx_file(path) @@ -56,15 +55,21 @@ def _load_this_file(self, data_store, path, file_object, datafile, change_id): self.parse_file(header, entries, data_store, change_id) def parse_file(self, header, entries, data_store, change_id): + platform_from_header = header.get("platform", None) platform = self.get_cached_platform( - data_store, platform_name=header["platform"], change_id=change_id + data_store, platform_name=platform_from_header, change_id=change_id ) print(platform) # Loop through each entry in the file for entry in entries: - print(f"Entry {entry}") - parts = entry.strip().split(",") + stripped_entry = entry.strip() + print(f"Entry {stripped_entry}") + if stripped_entry == "": + # Skip blank entries + continue + + parts = stripped_entry.split(",") correct_length = len(parts) > 5 has_length_and_four_fig_datetime = correct_length and re.fullmatch(r"\d{4}", parts[0]) @@ -88,10 +93,82 @@ def parse_file(self, header, entries, data_store, change_id): # Or it could be a bit of text that just needs adding on to the previous entry # So, check for these one at a time # - # Here we check if it starts with 4 or 6 digits - stripped_entry = entry.strip() - if re.match(r"\d{4}", stripped_entry) or re.match(r"\d{6}", stripped_entry): - pass + # Here we check if it starts with 4 or 6 digits, followed by whitespace + if re.match(r"\d{4}\w", stripped_entry) or re.match(r"\d{6}\w", stripped_entry): + # If so, we process the entry + self.process_non_comma_entry(header, stripped_entry) + else: + # Try parsing the line as a date in the formats + # dd MMM yy + # dd MMM yyyy + # For example, "12 DEC 1995" + formats = ["%d %b %y", "%d %b %Y"] + timestamp = None + for date_format in formats: + try: + timestamp = datetime.strptime(stripped_entry, date_format) + except ValueError: + continue + + if timestamp is not None: + # We've got a valid timestamp + # So store the details ready for use with any lines that follow it + self.last_day = timestamp.day + self.last_month = timestamp.month + self.last_year = timestamp.year + continue + + # If we've got here, then we just have some text that needs appending to the previous entry + # TODO: Append entry + + def process_non_comma_entry(self, header, stripped_entry): + print(f"Found non comma entry: {stripped_entry}") + split_by_whitespace = stripped_entry.split() + timestamp_str = split_by_whitespace[0].trim() + + try: + timestamp = self.parse_singlepart_datetime(timestamp_str) + except Exception as e: + self.errors.append( + {self.error_type: f"Error parsing timestamp {timestamp_str}, error was {str(e)}"} + ) + return + + print(timestamp) + + def parse_singlepart_datetime(self, timestamp_str): + if self.last_day is None or self.last_month is None or self.last_year is None: + raise ValueError("No previous day/month/year block") + + if len(timestamp_str) == 6: + day = int(timestamp_str[0:2]) + hour = int(timestamp_str[2:4]) + mins = int(timestamp_str[4:6]) + + if day < self.last_day: + # Day has gone down, so month must go up + + # However, if month is 12 then it must go to 1 and year must go up + if self.last_month == 12: + month = 1 + year = self.last_year + 1 + else: + month = self.last_month + 1 + year = self.last_year + else: + month = self.last_month + year = self.last_year + + timestamp = datetime(year, month, day, hour, mins) + return timestamp + elif len(timestamp_str) == 4: + hour = int(timestamp_str[0:2]) + mins = int(timestamp_str[2:4]) + + timestamp = datetime(self.last_year, self.last_month, self.last_day, hour, mins) + return timestamp + else: + raise ValueError("Timestamp must be 4 digits (HHMM) or 6 digits (DDHHMM)") def process_comma_sep_entry(self, header, parts, has_length_and_four_fig_datetime): # Parse datetime @@ -255,11 +332,10 @@ def load_docx_file(self, path): header["platform"] = splitted[1].strip() header["exercise"] = splitted[4].strip() header["fulltext"] = header_text.strip() - except Exception as e: - self.errors.append( - {self.error_type: f'Cannot extract header\nError from parsing was "{str(e)}"'} - ) - return None, None, True + except Exception: + # Couldn't extract header, so presumably doesn't have a header + # That's ok - we just create an empty dict + header = {} try: # Get each paragraph entry, after accepting any tracked changes From 4ccc0bcb80a4716e4efe4307c9f4715b62837b8c Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Tue, 12 Jan 2021 10:39:21 +0000 Subject: [PATCH 15/20] Add more timestamp tests --- tests/test_word_narrative_importer.py | 76 +++++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 4 deletions(-) diff --git a/tests/test_word_narrative_importer.py b/tests/test_word_narrative_importer.py index 250324cec..c3b2b5f68 100644 --- a/tests/test_word_narrative_importer.py +++ b/tests/test_word_narrative_importer.py @@ -5,6 +5,74 @@ from importers.word_narrative_importer import WordNarrativeImporter +@pytest.mark.parametrize( + "input, last_day, last_month, last_year, timestamp", + [ + pytest.param( + "141030", + 14, + 7, + 2019, + datetime(2019, 7, 14, 10, 30), + id="valid timestamp with days matching", + ), + pytest.param( + "151030", + 14, + 7, + 2019, + datetime(2019, 7, 15, 10, 30), + id="valid timestamp with day one more", + ), + pytest.param( + "011030", 30, 7, 2019, datetime(2019, 8, 1, 10, 30), id="end of month rollover" + ), + pytest.param( + "011030", 28, 12, 2019, datetime(2020, 1, 1, 10, 30), id="end of year rollover" + ), + pytest.param("1030", 28, 12, 2019, datetime(2019, 12, 28, 10, 30), id="four digit"), + ], +) +def test_singlepart_datetime_parsing_valid(input, last_day, last_month, last_year, timestamp): + imp = WordNarrativeImporter() + imp.errors = [] + + imp.last_day = last_day + imp.last_month = last_month + imp.last_year = last_year + + output_timestamp = imp.parse_singlepart_datetime(input) + + assert output_timestamp == timestamp + + +@pytest.mark.parametrize( + "input, last_day, last_month, last_year", + [ + pytest.param("141030", None, 7, 2019, id="missing last_day"), + pytest.param("151030", 14, None, 2019, id="missing last_month"), + pytest.param("011030", 30, 7, None, id="missing last_year"), + pytest.param("991030", 28, 12, 2019, id="invalid day"), + pytest.param("019930", 28, 12, 2019, id="invalid hour"), + pytest.param("011099", 28, 12, 2019, id="invalid min"), + pytest.param("9930", 28, 12, 2019, id="four digit invalid hour"), + pytest.param("1099", 28, 12, 2019, id="four digit invalid min"), + pytest.param("", 28, 12, 2019, id="empty"), + pytest.param("123456789", 28, 12, 2019, id="too long"), + ], +) +def test_singlepart_datetime_parsing_invalid(input, last_day, last_month, last_year): + imp = WordNarrativeImporter() + imp.errors = [] + + imp.last_day = last_day + imp.last_month = last_month + imp.last_year = last_year + + with pytest.raises(ValueError): + _ = imp.parse_singlepart_datetime(input) + + @pytest.mark.parametrize( "input,timestamp", [ @@ -25,7 +93,7 @@ ), ], ) -def test_datetime_parsing_valid_sixfig(input, timestamp): +def test_multipart_datetime_parsing_valid_sixfig(input, timestamp): imp = WordNarrativeImporter() imp.errors = [] @@ -50,7 +118,7 @@ def test_datetime_parsing_valid_sixfig(input, timestamp): ), ], ) -def test_datetime_parsing_valid_fourfig(input, timestamp): +def test_multipart_datetime_parsing_valid_fourfig(input, timestamp): imp = WordNarrativeImporter() imp.errors = [] @@ -79,7 +147,7 @@ def test_datetime_parsing_valid_fourfig(input, timestamp): ), ], ) -def test_datetime_parsing_invalid_sixfig(input, timestamp): +def test_multipart_datetime_parsing_invalid_sixfig(input, timestamp): imp = WordNarrativeImporter() imp.errors = [] @@ -100,7 +168,7 @@ def test_datetime_parsing_invalid_sixfig(input, timestamp): ), ], ) -def test_datetime_parsing_invalid_fourfig(input, timestamp): +def test_multipart_datetime_parsing_invalid_fourfig(input, timestamp): imp = WordNarrativeImporter() imp.errors = [] From 99f4e356c0d445206d4118c90cbcf2955354195e Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Tue, 12 Jan 2021 11:17:10 +0000 Subject: [PATCH 16/20] Add more sample Word files --- .../word/FCS_extra_narrativetypes 2.doc | Bin 0 -> 56320 bytes .../word/NarrativeExample_NoHiddenText.docx | Bin 0 -> 12201 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/sample_data/track_files/word/FCS_extra_narrativetypes 2.doc create mode 100644 tests/sample_data/track_files/word/NarrativeExample_NoHiddenText.docx diff --git a/tests/sample_data/track_files/word/FCS_extra_narrativetypes 2.doc b/tests/sample_data/track_files/word/FCS_extra_narrativetypes 2.doc new file mode 100644 index 0000000000000000000000000000000000000000..86b67841ce202c75cad0e94b533deda2374530c3 GIT binary patch literal 56320 zcmeHQ3zSvmb>4Rvm^;X07;z*>;Hm@=VVw5^2@b=61M(6Od>}Hz3}FOj2#f}eO^F(7 zQ)5YDV`?dVEZSzZU0F7DwOL(V39Bo0b+xIXMlD)PNosAhrX8(qVyaBP@0>X~``&rn z{~Ycf2iCo;Z|*tgo^!tQKfe9%fA9ZZPJM6M+28q-nLm(>*Yz?^&h;0|1a&-t?Y+T% ziAVvqdF)(&e}8ZooB7uTR0AIjJS*F<5REtmK?+yFEI%YkNK1<(Sl1Xckz0@cs z!nuj%7;qY;d12xJEbT($vCU>kEFKzhjuGYrF zUSNc$;+S-9&O7~Ydt*~EMuTk_jn?BC_G84l-y6B6f^Y%|C&&(<2ZNn|O(MbZZ16Qa z%4>>nO%bjsQrAoy<{FNQye5fjlDH=6U9;4Cwks1H56d#@Cxub14WmFCMj-w@fKe!g z5wXV`9dH4*&&^3lbAqE}5yr?)oYjGGs@og0Cc_iB^hBRbKXF(l%uS$mHmoTIdoikY zmRFU2HO|6l-CV-^aMUiw*x4zEy>X}=TpivhR05Bpv?G;B;EwPvPCSM)Q*vF&^2DY@ z3fqMkk-OxO*Qy2ZJPJ|5@zUfyUwfhnXB1&vXBm2N9!o&~r3`mF>^Pa9s0x>I518^Ru>=8K*ZIypo4&%D6rn z>#0)Mhce+t#eKX%@Jbx%?-du>L~-w%(l2i6K+?L0U3<JK?*Q%^}-beS8X6P)D`~sx@*Qks=UfHTU zSI;k6x9bl`;TINzlRUeclGup@h4Ro*zNa7M5x(Y+la#kXmE~WtYoN3u_NBvKn9<); zJhA_SVNVY}i2Ni+f-CXf-(RZs{LLgrRQ5*NfHqjWo4v1TGw`;S3sV}Yv}DX0rJ=*3^DlI+`y}e zf41J)Qat&2&wG)_uej`)KmR@-b=cQ%e%oj9TO1Wae`1QPXzSV6*4^MA_0JgiTU=NsRVTwk96LvP_4iNp>Y4@O zHhmTN9Nu>3b&q-n9$+!>3f}#_q5txn=|g{%cJwIDV=H_I?`*kKcz-~hu^n}B7RS!_ z#R)>3+bl(wY{!Mi(S;bfx)JwbRPDoltsN)w;}82GE<*n6(Z1}5qx~w7mq(j2&zG-( zd<~4L8mL6nsRCJ(3jAAy*i@DJTZ`>hcxju3bC{KwF>XfQV?B;;z`t!cx&rU(@NXrK zZo%<&nDHFKKhDKE@p&curE>4C>Z&%wPO}Devx$ zI97+P6mmHHmqN~HEj&ubgs$<9E(UwGcjYGJj%o%VTh(Mjh0kWnFt)ww@4_cjP*#O{;^-pVxuk3#t$a4(edE`T_gCYaOga zRxwmr8`{W^(;V`2maY)$bsZE#^@Y8zE&^LnRDGP1aH)TMD`TGU4PPv`3|R>NBAHqTUUZa5m!I2tnAJU1MTX&j9P z95qmOSXbOj*I+KnQIn=~?pbhXo!1PSBjlzWTIckJqn4uzRz2Hnrv{$2Zjnr-XD^8x zi`F`4G&W_?YOjG0XTzfPUIX>puxQ2C!27ge(VDNp*l)qYnR?jAxM#zn$9MWHQCYI9 zh4BDglFacvyINd0wC*_rv?zyGeJyN&1&7vrZ4Ax3)Rk6zZ4AY`aA>{P#?ZS9hgN%S z47Iy(Xsy@A(0Xyx6d^|!(BbuTLZxua(F$X)?HY`wJ%Ng(fx?*imXQs zg|?*%#f3xL6=oAG>PlOc${1F~r5xI(RK~C=E*#pT zRK~C8_?;(4?YJjqJIimXxE*x6tRVF%bfUZ@an?rtnv_ss1^t+DKyY)sQVGXfSITKCm4bYJVB`^GfgSI5x33k$t> z?ajx@1i12O?Z?RmxbkTA$H@q|@@W0X$qKmgXbZqBLDU}Y%A;*SoXmhLkG2AFas%0| z0DH9_|KsEaxH2qTY>GVE7R1R8xbkR!KTdwYjfW))N3}Q^0#_bw4dUboTzRxLVAde2 zdR%$5HHecdaOKg~z$RZ%iCLJo1#vP4^-k*H9AEdg=pKP9kG2MJG6$|a+8V^k9=P&o zYY-=c;Ksu_f6zXR`bCcbxblS7fIQKo0IoctH6TxPYv3{_aILYfwdga01}F6}0-$-K zdj_sNp*3KwMUM%%@`ToaJkhOzD^F+*$P;~z;K~zP1M)$qcyh=oN(ZP)~Gg;G$B!hL9{awt!KBP@^^( z0xD8l0(uD+c1^D%WQ|4l3f$U7TY;(=K0zi6TXyg_z!)FUEfd`*$mF4&ABsm?fvOlj zK_(AdcPJih1>)okGI@rpH*E#tWDPQThORfgqLANQIe#lKbiFY`5cY#OnS;#o3|((} zT_MX8-7CoC8Lr;6Er^pn$mAKW-n3T`Cx4L1GhDrCdk`mskjXP#y=i-piyXpm^``BC zO&)>1lePwNG6|WrHk8d_d_Zd_`uxC!h3lq5mNQ%OY+tK5LZe`KhxA}$vXa9QK zkCQ2ID~r~CoIHUWi`IUeEP)G)UM0xw^Pd3><-_Q8f@Gl=s$+K9R}Q(ACDcAy&fohV zx-6mg$wCj*qAa2I$zl@+(CY=sVx#>A6$K#6`9}eUZkJH|WU(3d8cQk)rgLM#PMl!e#nEQ)X2aNFX8yjP% z4x(jK2G>EHexQHZXe{)DXqleDbr7c?@EaLps~`-?;kl#|7;Z`P(CTdAZSoUkCm#yN&iZ8136(=Ud*fH1aCMqbC7 z-fcYR#x&14P8Qu|-lSLEbEanFIX9+x&T%s6Ze#lRYO(7%gYJSZJRtVZ@9q7PDru=vi|v zGUtQm=41&YW-LpTS#y-0thJwutocx7(QEAay-^$Ob2b&kAefnjZB^MF1k$*RV zR!m!hT;$G&ZY6CAa*;V7nnhcJT;$D%X3>@)7g_V6S+pgv$(ghDwI#60nUh6Zf;iv4 z%kFD%q|{!3P0pMw+7j60%*moHflbbw99sJ}IdigT?c3zc$)dGylQSoa*1k>7oGe=V zHaT;$XzknN%*mp)Z<8};Ki1l}$(fTyYu_ekP8O|wo18gWwDxUs=48>@x5=54MQh(C zXHFKaeVd#)S+w?Ta^_^w+PBG>lSOOaCTGskCDcBDdBbJ~px4fm#bySe*UpnA7c+p& zb|wIAnK^4wUo_H9=8vn*QsHmm!|qP1@m=O>HSzD=B; z@i47@n>fG!J>Ag4*u?qCqP1@m=O>HSKKCrFB_sQ~#%#v@a9tm*zD=B;HKw(1v$~%w zTKhI}ezIuo+r;_FqP1@m=jR$Vt$prU^kiDHuqSiRhDB?iHE6@4wQsYYpLf;Tw^`3m z7Cr9U#QDjhwQm#WCyUm;O`M-BTKhI}ezIuo+r;_Vr?mEM;{0UM+P8`GlSOOaW<5Vy zwDxV{{AAJEw~6zUMQh(C&QBJteVaHxS+w?T;{0UM+P8`GlSOOaCeBYwueEOz=O>HS zzRh}mvS{twtmh|-*1pYpezIuo+pOm&i`IT_*7J}2JxFqDOJEcACyTZOHc@}FXiH!d z^(TwA1XfZ1aFrk9aaK|PaFw6W!Wn>G+pL$BvKCt#u(b$(H^?>| z&G!GB(X*r_;C+1-&H#KC|8K5XXbD0V&i*Y}XbD0V&i*Y}XbD0VM*S^VsQr+IZEe9q z?T0Lk`dhG2`}x-eSOZCsHODW&=gvQJE@~hlX#aNL?qZSdi6RFt5jhK-nI!TmB>l=1 zk<(K}mX?V8Hc&WCWDoFNprTad3E-!|_De-h0&}N}JPN!8Y`RS3o4^P9N&ippyr*}5 zqIdpB@BFvkd0X%Nm)`lY-uVx`^M>B}cfIp(dgovD&JXm?zv!KR);s^CcV5;z|EPCf z)H~nTJO7|}zNdGvoiN4>w$JP`k%xgdfsL1o{24H7hR6}%HK6q)B7Y2&%oOPbP6JD? zKz{%VXQA%^r+~VTiW~z9W}}Y)&jHPI^6k$y9joon_PHPUD)2tAGbQpJV8N9lj{$wa zmaCv+VAj>>6ToXg>s<5+pyXrd6ToR;={)oapm09=1aJzdTaa)6vE2S_pJPD5HP9^Z z98j|m{Q-C%XunqEZ-J^sB2NJYi_x~g^T3jFv^S8vPUHY^8fdBz`5I7CDRLM%1LW=h z*wsJVXH}KR*MYKXk(OU`eEW~x_GkMPEQR)g=YX1zqs;(WhGW1fpy3AS97r}n=fG*8X*qNb zlr%%z-HM0EttREBC>TWj%~x|x8n2L&@X@~x1(PGuK-PVpkDx!wxeGFr-5a6qF(?L zcc5PY`Su^X?a%gk30Qg;bPFVRiaY@P5NK_KUjR&R#~cAT3#{4&8xNFrz{Ue-fR;|! z5TIl?YzXiw(EJJX6=2GqeEY-vk6HWA_UQ#)0h+p?Wnj|X&@yluShg2h1}5%X#752mTY-@~FsP0I6RVc?5V1*z|eC z1cBMVg7^UNCb01_#0-F0Ul4f^_z|%F@qGJ_W&N{#o&je5Dq_39>%iKhBHsXJd=c>= z;5A^)mqeZh%DxQ$5cna``ULz#VEV7YKLpMKtG)vN5GZ{T{vmJ%$m@UX>Ywe?@>Tfn zK*_H|%fOq!#;2fVVAgLy%fOF-^~caxfSJFEz5=`stUZ45wJ-I0fxHrU&htS@h0J;S z!SdA8&o006>F&a-fUA#o7tRH$Hi}dpy|tj?fE*F2#@>P|nNo3qR80Oc-D+SvPMZM) zou~xc1;86H+QS{k$NASqQUjZ$2mg8{h1G@Kct0qZyhV7G$$04>c(D&nC}!`$T)_J) ztNge0zvHmmG|4}fzzW75*)IpMp0QmJaESOSMQZ}du$%$6Yx&Qq=Nvz%pFiC%E%X_U_)@&E4nw z1NWZinRBM6`>pOiQ`KGdcF9XYL1O`60q_6-fCRAqaoSoF0stU}0RS)o@Q_+UcDBwY zw$6IW9`+_qIt=bM*2MYHkW{$z;8WtLZ4VGlSO$1`_pGkNCg#$I7Dkw3YP0N7S(}(spWKQ1oQ?sii6iY$$kmC7H?^n z7QWH@8sog#Oa!h846}c*W^M1lk!FVgk~Q?kk0~L_Wfo)xyi~sdMAIb2sFHg|u@DA6 zykgn7?pT9LS);J0mTQ2+L($E}KuV}vVDqsRC&e~ia+*;$OOg)M|AZB`@4Yvhljxly zAL|GTa3VO3#>M7Kl~;>~J9H*;W>uga8XzrlGboLy_Ey&RXsP)~Ckw*gxKJsi_;j~S zFU$xLH@=ddU#XtAn*B@#R}=-&&MmS9B0!~%+rR~I?aBtel;*1z{W02(`7wVbEETtz zThwY5A^HwRPpV#Rhu?u7wjV3|ZOK|X764IEUMazY!NAAT%NS!?cvC-+7gh@}QOi`DO zM4eOIWR(};9SHNcy1iQW?e2i2p!Py0_j&uT$Qe6VrjH|J64WCB5gISUCvaioZbDPX zx-hqCgs--Xkt|55CuWVv>kG0SR2HsN-?QYVG0n|eifehnmGOvt4klOqj5Dl2#64}F+V?EzEbb2rqC?0-2xGkWHr2+^ zeA*4)!Qo>+A<_7xw`qr$sUvU>-kAQ=KIV<<6)wOpg9&Us_yBkaH#Kc^1uG1A?GAbhNcExT@{t$(W-Unqntud{5b~AKA&gyY zc2ni9k?J~$N@6@Two?&=uBpNuWNp))0n9lnVP216g8I$v!ibRgr^A_yN{HN9g z&GWGxfHmfj2LM0^E5mPE^H+U2%vg;EQDF_9sO@v9f6l2T;L=lNQ2|jE=sLf2S%xD{ zT~vLc&Og9%e}VKwS~*jduoQu^`34*4JN}N3e?0E-Y88_qQ>@Qjc6j84tlc%QzbU*6o&FZRM71G%uLzR)zg&XO2Qhn&0;mr90FiF0%1khySJ-r{|lB8Rf3 zb`h((tb22pMOf(Jic+v&tV*&ccFjr1nJ8ik|F^dGq5k^W3$rZbq2X&Jy7&T582mS8 zv|o6nF_i-)iUrk<{oN?CWOe}LQNR&$l{a8LNZ~L}W+>Syg}ZQqs~J< zM6`(0{t~SnBP+xIC|OZf!Z?*sfC70f%lrAz{H$VRy43LYCc#^y<#XpP+D(1eQF`N{ z@D}b-cOF`A_K|{8WxGk4zD;eOu)PQE1~3Kt^A5JduATBbJt|n?FygL$uk3(?tR=4= zIn)5}#ltxU_D&)XomU?|@=5y-jXlD-6ffZJS!BqGZn$_T^?zkZ<*1_}O|&Nmp}ZKv8$OBJ?&9D&xKtc7*fVggQgYzX*5W|#JT*{iNqdA7#uQwkQQSpm;zWIbm0D=cV3Gw{pEy=np`^d7)X4wLpUVj{ zSx330*rtUasJiXgc}pmCKTt4StYe-4rzW`c2DQqsPKHn$YP&uj?m*_#QpGdnNHw?p z^QCVl8C7FP3|YZ7F3j9g45HyG^j0n#;Si-{*jqMINbHzU!lL~!N!x0nBWMR!F> zlyoyI^yc5xBMrjER`^`4Y$POTG&oashk=AHe~eGE>|l_^#KqfnU3ef*PA=`d68_{g z%Jq80ii8Hq*vV1ou_;jtF|wGAez3QZ{%=<(jUv|Kf=m?Q>zw7li>IiHwe%?jLKKUQw9CosYFmrV zZmt0!I=fEb2t^x~a!!!Xg{H5z&o*JOX6&-#kF{Prq3JDOv#=k5cnefEp;<~5J&g4 zf|GT4t;it*NwCiKP$*0Ho9Py*OhQuC1pHVbvUKLWQF7Q$I&0|e^p1S{@Ef7rQXPtN$vc_5$t}}m=zJ`y0{g+(>McrlI|IjqAxg+wP@^Vc-{t+9k3Py z$5%Y9p)%wt#+E|IqyuL|ycw|Q;byAi$BnPLW~b`p?uoHmYn8TSjMlui1Q9*k#4a=jNPb(D<40LVB#& zP)yRL#qh=sA=WfIj)U$54#Gdg`w(HY^h;y7AcgH%p3;+FjLQazfO_%^XJ6M%>Pc01 zZGR)a6koYk6h*T|n>N}mR(`*|s&wrf$24ZMKnJp-tt#HK2oPpik;Z5NMsA!f;`5s5 zHL;}NUL;aCTCi#n^)AqaS$~5~PvzOg9@k!lVjJpDK*YAyl!u%AXLa)iAH@^&mXlK%0}jeWA8C?9SXxrU8t1 zF$lf}(VC5^0y0dE@}r_Mmc{C5-;%XLOs5DOmYcdwXUmWbRn+#^ z<|Axe-BC$9QRntECG$#qs$WI>&(RLEVt$`eacyDDdPhlcEzp1BDKLgAnUu^&<>6AT zXnDKpS$c4Px!+Z-de^Zm1PqV-T9#AA6G(LLT%#h_ve{>mH`XQ)K}8pyR_2 zUfr*|iH9Y(As`luFtER~+Zl_+=iOhuXN*M2u2B{~O?#~qv<5gAOboO$TU*aa@;!Qy zDB3QpY|nijt?qGR_x|~4l|)6|%3cX7cZ0^H{|m@Mr&u62oWN9ECk)pKKudx%4NL-k zAZf9)>_n>&P5%~%;e#A(z;VTi%XE%AJQLkx_Ayp4Ofn-@S@|Wk{q-*CHwq6>ESCZ+ zvXc!9EQV5vfPDj5gQNAK*%DyoU3@VS{ns#sUANF4q+~C&I&Kkd#cYIh>>ptv7kGG4 zzE!C#kU@H4am}2wahJ4;7x65%d1CQWl2>ok}ZLAT$0KzVf7uGI2uam8x z4$&OfxP(J^Uaf^DV=posmuRLryKxz)Z;)WH*wc?9p=?f9kNXm>4NY50AWM(>3Ck0Lh9obn z4Za-bk;>{4j+Dc&X7+^DgkBOYRA^b;@ir|LkijE4uJptg2k1!r*oRl&pT?`$?*EnL z?B)8|nNa~NglIWkr+jw|V%LiC6SPe2yLtDZI9vUUHL5h8u$L5<>EhJipcwNc*646b zY3N8G{Khqf?$M}GTysvCXg1S43)-W4oJMz8OL~`}5aI0D9bd7IZ4>-RH z8f;XyJ=K8saeSe6PKEE5g!@UXsNjY(yFK&^<>uF83ghg)O`V02b+S=lWJ(hwi4RP7 zsbX^a;1T1VXwfQMMU*;mvXnop)%gR=09nfYyG@h{rjl2@Vs&raa!;B*7=}*2v~Gtw zEMlVNcWbUQY87<^ox2jAiO`x)3}U9Bv(fQ=_%KKB8CzltT5J(^SP{XfB0_L-o%@y+ z@in*J46S6dn9}09bUj-dx^{C4mZ;V^Ak&7U$ zXbMQXNjofic2v_F{0grzH7mEL0=d0Szz&ZSl(s&!B-%mm+SuGgF`j(USX&!`rttUq`ewGx>x-c)ZuXF z@B1rxvKc4~ELPYZ6BorqKuE}1_c&@!AMKY67+5WYI)jGoKWUFs@^Wv7`4SA&`r=X4 zc7Cu+vd@$*w#R%rI`+Gt?YKq#r@%N$nMian(*h2cd z8u%Z*0uC7>;}zrrfLKB6G7qR9PvrHds34L{mJ*~Q$G_anH@ImLe3Ai0{WzfqrhX^% zZfn|Ii5az+*{h{%hcfRi9FoTh9D)*{*fLDWyYBzeDU@9+3{(_{z$xp_wRrE7q?jaM zx+x+Bv+M}WCPPS&Ps*!opy^(mq)+UF9w*6cPHrWiZ}1Fwna5QJfgiF-&(Mt_O4t2j zQVj|{D>f2EykS8ENgqoHf&$*k9!vP{kzjDrk!_?mTg;k4s3J$6|=YH+76ra9tI!H zWjZE>KL@nNr{lck-Or5j%S)#{V5kd-LCI_*9vEL8t9n-xa<}02v_BRa3}vS)O4tyc zCnLw?)&_B|%`NfTw3?1GMlJ$pWD6Mm06Z&jM_;2%;6Uh|FzuLnz){bBb;I|=RQeD? zM{tWW-S-~}a|X25t0l0S5`lv_j6VW6Q}8*KvVo!XZ+f~CuVV)y#tJ^6JmMoh#xvS2 zNK+|Z79*2McU5rV`=D=KfK$UkZu|6zvN)VClC-aTVb~bB$&(qJ;*i#az+dZRuKbuJ8WezuAWd=AYW zBlqtOoeP%B{CYLESp9M0#X{<7Qj_JbGP3iEdMxz-N;^H(=wdRx1M-RgSg&|OjRNb6 zA9Ab#8Q!;Wt2shBnhQjIc;}IbosUG#N_mSU&}9Q%gsv62sFAgY=dMz46#e`Dbx58{X(j!jzMgBIYxZBdpxnfe~YF6r@c4E@p4a zJBW+4uAd8UJp;}!5A@3N(L7(4m;*&-wobMfIHBl%g=1FI@BkMa~RfkyS>2aNz6u4h=*MZRUV3gyz))N;Jg36` zIdlsq83f{J+zhvcmdc>ZS`sVi;mshZV{~k}6T5qHWZ~#XlKT`#Nfa^7rodr|yvim@ z@VxZJ3UU!raCs;(md0N$|v#o;4sw9+i6AGCNWC zbeE$1c{@cg5hU2&7EyrxU;G4NFp-Dq&w@%?cyG)%Qpt&g>T9#Qo}rIKXy3hb(C20_0lr+M&0S~tb_d(%{o30YEAPGX3w*F z&t-7d`Csk4M_$#@eHndVsMedWKJ-nYp^f;y9r&^q?YdP5Hyg1dtGvDf4eAlWOvFPK ze*R|j%fr--gm&UMXqOZ{AaBX=hu-5ROG0tLWA8TC>mnGu6NzLVhV`wbal2hxsY5MP zqw{%TCpkE}b)1~EE`tuEx##iYakc6)yN&}qn>RL<4%_yR)H!PocGkO+7X-eu!MxxP-Oh6hNgTyP12gFga<1)``%37J z`vxQvZ!^s%wD;qA3?=eC z0=qI(B35R|NQ^jlm9o$2cEAKM3XRA>fpwfL{J>5oPkgO=GMd|Zym&m8DpWfCNE%ZZ zG4?EVgwtH7MR?qH2%f^l2`yCE_-jQ+(s|T$5>t&wE2xM)JzRC^=aDYMyVtPMrph|X zGohYkMd&W-GGzNrmfnh&T0g&}i_Y4yRr+?WMkp?0om6pNwGy?p#g5=&F{R|w{Afpu7(Q2ym?VYc9YV^!W63WUNBiwvYRf9SL`?q zjOJi)Qi)<)IJ;MkmrZ`o9yhB%YgAq4ch1>QF}p6%Vn68+a*Gd(6m2@;qQ+#-*AmcP zU0)NvwzM6Qo?&9`a#BgAp)i^kgM9vxAonCO)5W3#ZDuFzZ3Jx!IWjoK9%2Hot?; zdOVJ@td7_Us=#Y3OJIzpTMGC7#?9DTZhs-i3c7lZ&5>A(Q)<}Ib5QeIkv)$x0`sQIk<2=4xk&2Irr>tw-xqq)$XVa8$9vyp&}A@ zaFM+NBdfpmofcqjZwJTM`|bJ778Apn643QbiSvFW#++qQ??EFqjUa*b=t@;h?nArX z>!~c}-5ewpf~Iwj&DqDIDyt6r5kx{ZN0io_=H(&gr}u}Mwhy7>9)mI8Ge10p%BBtB zMN3M&m)u?XCgv=w_#T>5s~ZO*rBa`pL96>F)*6PIWT|YBrYq)qJN_xzq-YTQCk3wx ztOxi+1p)V11rit=f!% z%1rYyHwqEmyg(3oxx}?&v9|f-+$6v zwZyhn#*Sg3i9`iM08lt^CKF9)_F5x0aQ5vfT;gRuTRwtqo> z7EP4V2#x~4JCgl$m$JeBpw3k?H zYwqs)2T`_YOmTusLbKU3`wR=Z7=D~TZU;r~33wSgc7;;*y+{tt)bu3)MH?joSH$0x0Pa%Jq_5nL6su^*CfF=`Zr{tgPD!J+>t z?k6Y~jhwA35G<=(QAjys_KIpGc$O+C%YKFQ3qqgFkSsC7J1#$nxp+(hb^3RKU6u%z z4YT-a^ey?|3Cx{iwvK$K%)PiUJngwtFb{ex0hT(1G3erO;M2tq7inPQJ1njrjtW95&%2XVc1*h4xll?$dIG(le z^Pil5Nh65wE`ea*cqn7IxtMHIc6G9ZhU?!PH#1)0#?8f4;g&Vrh@J21nlxc1lAV_% zYXPb_OWD8r#ynw&p^C6YS9QiS;5`--v+<52hQuNgM`E#g1eEyj(nz6kF^r?2u~7C% zkP@SN5~#MI`86tmh+w(zl}N2|vy}PEGE6?ioq(@%?2l^7@0RW(s^``c2tyFL?1TlM z7Uj~@9800t>*QO=r3<;fIpvE1Ci1%%orECl7)pjxUUvl}vRA?jzCn!5(Pi+H0cv~M zfm18Q1=Od#ni!Cx;=KNfcd`$xh|L2rWW+{pYD z5d-j+vln`Vp|6Cx23&O2CQ`7zmtOs*b|6iiouabUESNc=d z9yoFPKa!Vzh5wq<@FyJV#qaQ6GaP;u@#~4upCaJF+{$kw`0M%5ukgQf$A5wW00L|P z;6Iq;U*W%UdVdP&!~MPYzcGBj;(w)r{=|>r|BnBK82VMhuVllY5-JJ*nE$`2hhM>e lUoiiK0swDl{xbXDmrr>q7_fT$%=SeBbc2`JX}X`K{{wCO Date: Tue, 12 Jan 2021 11:47:47 +0000 Subject: [PATCH 17/20] Implement actual storing of comments --- importers/word_narrative_importer.py | 38 +++++++++++++++++++++------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/importers/word_narrative_importer.py b/importers/word_narrative_importer.py index 0bcb380fb..fa762c825 100644 --- a/importers/word_narrative_importer.py +++ b/importers/word_narrative_importer.py @@ -39,6 +39,12 @@ def can_load_this_file(self, file_contents): return True def _load_this_file(self, data_store, path, file_object, datafile, change_id): + # Store datafile, data_store and change_id in the object so we don't have + # to keep passing them around + self.datafile = datafile + self.data_store = data_store + self.change_id = change_id + _, ext = os.path.splitext(path) if ext.upper() == ".DOCX": header, entries, error = self.load_docx_file(path) @@ -52,14 +58,14 @@ def _load_this_file(self, data_store, path, file_object, datafile, change_id): # Stop parsing if there was an error during loading that we can't recover from return - self.parse_file(header, entries, data_store, change_id) + self.parse_file(header, entries) - def parse_file(self, header, entries, data_store, change_id): + def parse_file(self, header, entries): platform_from_header = header.get("platform", None) - platform = self.get_cached_platform( - data_store, platform_name=platform_from_header, change_id=change_id + self.platform = self.get_cached_platform( + self.data_store, platform_name=platform_from_header, change_id=self.change_id ) - print(platform) + print(self.platform) # Loop through each entry in the file for entry in entries: @@ -124,7 +130,7 @@ def parse_file(self, header, entries, data_store, change_id): def process_non_comma_entry(self, header, stripped_entry): print(f"Found non comma entry: {stripped_entry}") split_by_whitespace = stripped_entry.split() - timestamp_str = split_by_whitespace[0].trim() + timestamp_str = split_by_whitespace[0].strip() try: timestamp = self.parse_singlepart_datetime(timestamp_str) @@ -134,7 +140,9 @@ def process_non_comma_entry(self, header, stripped_entry): ) return - print(timestamp) + message_text = stripped_entry.replace(timestamp_str, "") + + self.store_comment(timestamp, None, message_text) def parse_singlepart_datetime(self, timestamp_str): if self.last_day is None or self.last_month is None or self.last_year is None: @@ -230,8 +238,20 @@ def process_comma_sep_entry(self, header, parts, has_length_and_four_fig_datetim def process_fcs_message(self, timestamp, platform_name, fcs_parts): pass - def store_comment(self, timestamp, entry_platform_name, message_type, text): - pass + def store_comment(self, timestamp, message_type, text): + if message_type is None: + comment_type = self.data_store.add_to_comment_types("General Comment", self.change_id) + else: + comment_type = self.data_store.add_to_comment_types(message_type, self.change_id) + + self.last_comment = self.datafile.create_comment( + data_store=self.data_store, + platform=self.platform, + timestamp=timestamp, + comment=text, + comment_type=comment_type, + parser_name=self.short_name, + ) def parse_multipart_datetime(self, parts, four_fig): day_visible = None From bd50a275699028590a600328276bee581d89a083 Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Tue, 12 Jan 2021 11:48:09 +0000 Subject: [PATCH 18/20] Test storing of comments for files with no hidden text --- tests/test_load_word_narrative.py | 57 +++++++++++++++---------------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/tests/test_load_word_narrative.py b/tests/test_load_word_narrative.py index 9a7a06088..d9400457b 100644 --- a/tests/test_load_word_narrative.py +++ b/tests/test_load_word_narrative.py @@ -1,5 +1,6 @@ import os import unittest +from datetime import datetime from unittest.mock import patch from importers.word_narrative_importer import WordNarrativeImporter @@ -43,42 +44,38 @@ def test_load_word_data_full_narrative(self, patched_prompt): # parse the file processor.process(FULL_NARRATIVE_PATH, self.store, False) - # # check data got created - # with self.store.session_scope(): - # # there must be states after the import - # states = self.store.session.query(self.store.db_classes.State).all() - # self.assertEqual(len(states), 746) + # check data got created + with self.store.session_scope(): + # there must be no states after the import + states = self.store.session.query(self.store.db_classes.State).all() + self.assertEqual(len(states), 0) - # # there must be platforms after the import - # platforms = self.store.session.query(self.store.db_classes.Platform).all() - # self.assertEqual(len(platforms), 5) + # there must be 1 platform after the import + platforms = self.store.session.query(self.store.db_classes.Platform).all() + self.assertEqual(len(platforms), 1) - # # there must be one datafile afterwards - # datafiles = self.store.session.query(self.store.db_classes.Datafile).all() - # self.assertEqual(len(datafiles), 6) + # there must be one datafile afterwards + datafiles = self.store.session.query(self.store.db_classes.Datafile).all() + self.assertEqual(len(datafiles), 1) - # # There should be one state with no elevation, which comes from the NaN - # # in the elevation field in the first line of uk_track.rep - # states_with_no_elevation = ( - # self.store.session.query(self.store.db_classes.State) - # .filter(self.store.db_classes.State.elevation.is_(None)) - # .all() - # ) + # there must be 25 comments afterwards + comments = self.store.session.query(self.store.db_classes.Comment).all() + self.assertEqual(len(comments), 25) - # assert len(states_with_no_elevation) == 1 + # There should be 15 Comment entries with the text 'Message 1' + comments_with_message_1 = ( + self.store.session.query(self.store.db_classes.Comment) + .filter(self.store.db_classes.Comment.content == "Message 1") + .all() + ) - # # This state should have a time of - # assert states_with_no_elevation[0].time == datetime.datetime(2018, 5, 7, 5, 0, 0) + assert len(comments_with_message_1) == 25 - # # there should be 581 points with an elevation of 0m - # # (this proves that zero values are imported properly and not - # # treated as errors) - # elev_zero_states = ( - # self.store.session.query(self.store.db_classes.State) - # .filter(self.store.db_classes.State.elevation == 0) - # .all() - # ) - # assert len(elev_zero_states) == 581 + # The first one should have a timestamp of 1995-12-12 05:00 + assert comments_with_message_1[0].timestamp == datetime(1995, 12, 12, 5, 0) + + # The last one should have a timestamp of 1995-12-13 05:17 + assert comments_with_message_1[-1].timestamp == datetime(1995, 12, 13, 5, 17) @patch("pepys_import.core.store.common_db.prompt", return_value="2") def test_load_word_data_no_hidden_text(self, patched_prompt): From 7aadbe058afaefc56966a6eb635369baff6566c0 Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Tue, 12 Jan 2021 12:11:05 +0000 Subject: [PATCH 19/20] Strip message text, and use right params for func call --- importers/word_narrative_importer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/importers/word_narrative_importer.py b/importers/word_narrative_importer.py index fa762c825..7895232db 100644 --- a/importers/word_narrative_importer.py +++ b/importers/word_narrative_importer.py @@ -140,7 +140,7 @@ def process_non_comma_entry(self, header, stripped_entry): ) return - message_text = stripped_entry.replace(timestamp_str, "") + message_text = stripped_entry.replace(timestamp_str, "").strip() self.store_comment(timestamp, None, message_text) @@ -233,7 +233,7 @@ def process_comma_sep_entry(self, header, parts, has_length_and_four_fig_datetim # and if so then parse it and store it # Store message data here - self.store_comment(timestamp, entry_platform_name, message_type, text) + self.store_comment(timestamp, message_type, text) def process_fcs_message(self, timestamp, platform_name, fcs_parts): pass From 26aff92e37744cd4844e389b40c123a70a944e3f Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Tue, 12 Jan 2021 12:11:24 +0000 Subject: [PATCH 20/20] Oops - move asserts to correct test --- tests/test_load_word_narrative.py | 106 ++++++++++++++---------------- 1 file changed, 51 insertions(+), 55 deletions(-) diff --git a/tests/test_load_word_narrative.py b/tests/test_load_word_narrative.py index d9400457b..a2d4a51a7 100644 --- a/tests/test_load_word_narrative.py +++ b/tests/test_load_word_narrative.py @@ -44,38 +44,38 @@ def test_load_word_data_full_narrative(self, patched_prompt): # parse the file processor.process(FULL_NARRATIVE_PATH, self.store, False) - # check data got created - with self.store.session_scope(): - # there must be no states after the import - states = self.store.session.query(self.store.db_classes.State).all() - self.assertEqual(len(states), 0) + # # check data got created + # with self.store.session_scope(): + # # there must be no states after the import + # states = self.store.session.query(self.store.db_classes.State).all() + # self.assertEqual(len(states), 0) - # there must be 1 platform after the import - platforms = self.store.session.query(self.store.db_classes.Platform).all() - self.assertEqual(len(platforms), 1) + # # there must be 1 platform after the import + # platforms = self.store.session.query(self.store.db_classes.Platform).all() + # self.assertEqual(len(platforms), 1) - # there must be one datafile afterwards - datafiles = self.store.session.query(self.store.db_classes.Datafile).all() - self.assertEqual(len(datafiles), 1) + # # there must be one datafile afterwards + # datafiles = self.store.session.query(self.store.db_classes.Datafile).all() + # self.assertEqual(len(datafiles), 1) - # there must be 25 comments afterwards - comments = self.store.session.query(self.store.db_classes.Comment).all() - self.assertEqual(len(comments), 25) + # # there must be 25 comments afterwards + # comments = self.store.session.query(self.store.db_classes.Comment).all() + # self.assertEqual(len(comments), 25) - # There should be 15 Comment entries with the text 'Message 1' - comments_with_message_1 = ( - self.store.session.query(self.store.db_classes.Comment) - .filter(self.store.db_classes.Comment.content == "Message 1") - .all() - ) + # # There should be 15 Comment entries with the text 'Message 1' + # comments_with_message_1 = ( + # self.store.session.query(self.store.db_classes.Comment) + # .filter(self.store.db_classes.Comment.content == "Message 1") + # .all() + # ) - assert len(comments_with_message_1) == 25 + # assert len(comments_with_message_1) == 25 - # The first one should have a timestamp of 1995-12-12 05:00 - assert comments_with_message_1[0].timestamp == datetime(1995, 12, 12, 5, 0) + # # The first one should have a timestamp of 1995-12-12 05:00 + # assert comments_with_message_1[0].timestamp == datetime(1995, 12, 12, 5, 0) - # The last one should have a timestamp of 1995-12-13 05:17 - assert comments_with_message_1[-1].timestamp == datetime(1995, 12, 13, 5, 17) + # # The last one should have a timestamp of 1995-12-13 05:17 + # assert comments_with_message_1[-1].timestamp == datetime(1995, 12, 13, 5, 17) @patch("pepys_import.core.store.common_db.prompt", return_value="2") def test_load_word_data_no_hidden_text(self, patched_prompt): @@ -99,42 +99,38 @@ def test_load_word_data_no_hidden_text(self, patched_prompt): # parse the file processor.process(NO_HIDDEN_TEXT_PATH, self.store, False) - # # check data got created - # with self.store.session_scope(): - # # there must be states after the import - # states = self.store.session.query(self.store.db_classes.State).all() - # self.assertEqual(len(states), 746) + # check data got created + with self.store.session_scope(): + # there must be no states after the import + states = self.store.session.query(self.store.db_classes.State).all() + self.assertEqual(len(states), 0) - # # there must be platforms after the import - # platforms = self.store.session.query(self.store.db_classes.Platform).all() - # self.assertEqual(len(platforms), 5) + # there must be 1 platform after the import + platforms = self.store.session.query(self.store.db_classes.Platform).all() + self.assertEqual(len(platforms), 1) - # # there must be one datafile afterwards - # datafiles = self.store.session.query(self.store.db_classes.Datafile).all() - # self.assertEqual(len(datafiles), 6) + # there must be one datafile afterwards + datafiles = self.store.session.query(self.store.db_classes.Datafile).all() + self.assertEqual(len(datafiles), 1) - # # There should be one state with no elevation, which comes from the NaN - # # in the elevation field in the first line of uk_track.rep - # states_with_no_elevation = ( - # self.store.session.query(self.store.db_classes.State) - # .filter(self.store.db_classes.State.elevation.is_(None)) - # .all() - # ) + # there must be 25 comments afterwards + comments = self.store.session.query(self.store.db_classes.Comment).all() + self.assertEqual(len(comments), 25) - # assert len(states_with_no_elevation) == 1 + # There should be 15 Comment entries with the text 'Message 1' + comments_with_message_1 = ( + self.store.session.query(self.store.db_classes.Comment) + .filter(self.store.db_classes.Comment.content == "Message 1") + .all() + ) - # # This state should have a time of - # assert states_with_no_elevation[0].time == datetime.datetime(2018, 5, 7, 5, 0, 0) + assert len(comments_with_message_1) == 15 - # # there should be 581 points with an elevation of 0m - # # (this proves that zero values are imported properly and not - # # treated as errors) - # elev_zero_states = ( - # self.store.session.query(self.store.db_classes.State) - # .filter(self.store.db_classes.State.elevation == 0) - # .all() - # ) - # assert len(elev_zero_states) == 581 + # The first one should have a timestamp of 1995-12-12 05:00 + assert comments_with_message_1[0].time == datetime(1995, 12, 12, 5, 0) + + # The last one should have a timestamp of 1995-12-13 05:17 + assert comments_with_message_1[-1].time == datetime(1995, 12, 13, 5, 17) if __name__ == "__main__":