From 05b87442e332da0b384e324bbe32dd14fdec7390 Mon Sep 17 00:00:00 2001 From: Jilong Kou Date: Tue, 20 Aug 2024 15:35:28 +0800 Subject: [PATCH] Add basic merge testcase & crash recovery Signed-off-by: Jilong Kou --- conanfile.py | 2 +- docs/imgs/Child_Node_Merge_1.png | Bin 19791 -> 16571 bytes src/include/homestore/btree/btree.hpp | 2 +- .../homestore/btree/detail/btree_internal.hpp | 4 +- .../btree/detail/btree_mutate_impl.ipp | 4 +- .../homestore/btree/detail/btree_node.hpp | 13 + .../homestore/btree/detail/btree_node_mgr.ipp | 2 +- .../btree/detail/btree_remove_impl.ipp | 3 +- .../homestore/btree/detail/simple_node.hpp | 3 +- src/include/homestore/btree/mem_btree.hpp | 4 +- src/include/homestore/index/index_table.hpp | 108 ++++-- src/lib/common/crash_simulator.hpp | 3 +- src/lib/device/virtual_dev.cpp | 9 + src/lib/homestore.cpp | 2 - src/lib/index/README.md | 11 +- src/lib/index/index_cp.cpp | 15 +- src/lib/index/wb_cache.cpp | 153 +++++--- .../test_common/homestore_test_common.hpp | 7 +- src/tests/test_index_crash_recovery.cpp | 356 +++++++++++++----- 19 files changed, 509 insertions(+), 192 deletions(-) diff --git a/conanfile.py b/conanfile.py index 524cd6a1d..f34a966c7 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "6.4.62" + version = "6.4.63" homepage = "https://github.com/eBay/Homestore" description = "HomeStore Storage Engine" diff --git a/docs/imgs/Child_Node_Merge_1.png b/docs/imgs/Child_Node_Merge_1.png index d9329bd0850adff157af0e70923882089f6afcb9..cbc5c5d094b493fecd144d699f8637f43132bf9f 100644 GIT binary patch literal 16571 zcmeHu2UJsAw{AG7C?Djy@@DN6qH`1_uh#V#fVZ>q=Zlm2#9p)H44%b z2t`5Se7+twDP zsPvPLfq5o8&(kk7;Z54)6n0@@@oh@FqRK5{QE3}{m&&SIJp<#`U+O5ICgs0KuBB@f9rs%4rfy*96IZW51@*fR&Fvgq zeT*&b3yaI6qT|hNTuiJSVA;8_?Dx^Julz$I?me`+dDn1#VZpLsdoFG zqQ+g7Tl%+jAKJV6tP)AFiEo3$qA+6Z zx#`KmRMOZ`{M$FKe!iwV8`=95_~zyojkdSB^>br$Ynw`4T_x`A?R{Ta!_MP(cWH_$ zN;bCl<`%!tEG#eLR~D8C)SX?@`o_|C;wCWz)fGLv_-w6f;gQZd1_lXp(8y2}2{}E32f}so7qEPoBpm7Zg=yzze}o zVk@c}$0uhRTH12qzLjO6%PS

RN8zhxsMKJdUvTgS>!WA+BxAE-ZgvUF%quwgX(8 z&i1a>P0)*@e7Av{6W+H?{eT4lJNT!&6A<+lXk_reqpQh)Kf%t(&vOMi{t*OH{Bq}} z+JoTn)v1>eJWkr(?K7%l3PD!07Vp>n|trwcPJBT-5eW&(|8<&)GnpRDJdA z;-k_f3cWH3(<^o3)d%r+OCc>8Umi(ZI|AtSpXaYigoCkN6cN$A&!k-{a1jI=Jk#@J zR{i(sZ1Kc2?P3*^D{;pnNJ`-oqru<4#vKEFo)P{T?|$Xsd{KP+_OaXw#^wjt9c_IF z_u47ck~55aph&O?L#DiAY}kzs0r9Vw$%8MT$64N|c3k!cc;8D1d~=t*<7=>-q7fXE zqfEIEM6vg9Q@n=VLus~2;gajl6%W_#JRGcD`7^}NGNLqyd?bPIk9#Zz(gsGX5e+iC z>?LKnNwZJY1?Ior0D*`fAL2FUzm(Xb(%2AQ1-s$3Wn~P^<|b1&ucIKuHp1|8BDAAv~S+CRh!81^x|HP>)2jt3|Cov znrlWX5P{F}7D1F(N;s-$&Xok&R_ zW?M94u~TA3Me~MxBXIc3@);r50qbj?AKj#X%2;u&Gc_AsNnnURv5Tm_A;Yy>6H~~{ zd23|<1PD~#;Rz<@4qZf^XheI}^>ORFg80#Q=r#?!8;OF@+JO8p@b-KBe%sh6hm)}Cwsv%f<>!`c z*-`BE$$K@?TWv{mH)L2qk&R~HuBo!+n6hTHZpA523-eKkst{HWBXYbl8PW!V){nojA6rjk_XNqR(!T0!p$9ty~`R1y9+!O9^D_8^%Ny5 zObe_b+fa+$4JLl4Jgoq)aL9u|6pqI?Vxb>?LU{O()*6RMCVzi%b>a1oV41q-I)%1n%!Zj& zvRB$P+V;xw9;r--J4#E{P@~65;@?&rY;S#3gC~|X-)o|Ir?{3}XR}CfMDFJ(ayQAM z_UO#l_xgWZ?0!e4J+8jIL%6{iOAngS%SqJFyi()!k$FOVRmQfbZ2;%T$r@U+6nE-i2NhqFu1ettMy3zkE2 zh;ZNpX7F53e#S2BT6N}WOguQ^;}sAHuk8x)6sMn~5TYeN<11vvgp)amKm7=^W0t2Ck(0k2`(oq;BOHGX_24~sQ)TS?s`u2w z>oGp$)kh%r*V03^V3?&AXt1&!+!c@{JJk;mL@rG&$*T_u_QQNK>2Aj_Dd8KsVEm6f z>F8C_v2M~&KY?C`Ow*UfHo2tKA3X`W!)|GC_WWfvNK5QL-7@m2mR{8LzhfHt9#}E; zZ>|koa-e&YgQi>19=w_Q_UP>xb%s|Wf71SE?L4PPF6@q3*Bxe@#Xz(#HQoOhpZ419 za=&Qg&2`4GzP_Ra?j0R@ChX)bQPu~c((AoW?{ z&3Gjjre>2&-bBW*(c#@PEo88#DF`~p?v$FHh zbp9M}ImW03sxC@h7FhXQhRDlI=O27hr^HV3`+1$Aep%~v7J;xliZWrJdRCTE8_U@2 ztV32{lyjharfq~z1&zT=m`%wY(*x*`pD z|4|QpIQ^(_-%WA<4{nTnCL-Z*l~8A_m0bd0xKOrAbaWI?dyEXXXD<3l;5yUPIM0?G zgyo&Nq_svFM3zwoQ6D8+t_e}T1*eDqgoOV@!$;~@c&s!4ss3l(auU;C;*I34ti?Ok zCvNU}nEIpj-(+&swxH#D`93RsO~;4FC)RjEWYZy5Cm6$GC~&)(Jac^FG4l`s`ACHr za9$lg49CN$)MQ|hEYfas@Gv?@q#e~x%{o|5Ut3bZ0gqijNGP(M33VCOd3L71dh&_t zu3No;=LFOK3(48ViTV_U$i`Tji(4`q_==~4V$+~o7x8_n>qEoLxOGN(NW$LfGBo9= zVPU-hKdShV5N#mp@i=#6<87&d&d|v$hqJ2bWEEDWk=`W^c?sb^SJZu?#EK7D?p8gfL8f5SMK@+Tbwe-R@U9h>uL6jV8 z#0I>URr(4<$ z`pt5JAS+V&AR4zC^-o+6!^TaVl7`iIZ8W)H`j5U6jPP6y#;bLLmn&H(5eF<@xac2e zVa(~5s0A&!4UJqa#qjh`D550Q$b7i7kxeLf?T)l(*hd|k(jfZ_%ac^Yi1hlS=e4kF z@uln_+Hyswg%C*Mp0NLU_DdUegyW!Nj1U%k+u^hv!b#Av6TtOcF7D;0zi#06aod8J z-$5r&GxBw{;@QJ;cN&#reZEmygLl7#FJMm(7ePyd3iA z2OrN#-&b?&i0*843Uv>j3`hx=w9$c7o@4`5zwKgc`!CZ8F$b?Os9UnLznEVLqzbI70KE^W{QeKq6@%mn=bJpW+flL>lJAVrj? zy9rFlDF{nR2UlY`V5XaJ4UrD9-vAQZ`_GKxERO(fUvrL^oNGLg^Fou&Ofrz|fF$;h zD%+JmRN?f$pVf43AJvRhXHXZ7LB~|Yf*>r^SLwa9$y-{=b0X$uqqzUi)Wv z+y(;9zT?a-sZjVX&zJ#(eYoV16i%KA9lJh)4(kZuxB=ig4Gn!+&h`RJA-Yfl_cPSD z1Yz$4b%x!k453z;^_M=FJCV~>;pYPQju0h}N7hX7TB%1%p#*NJe3w5ynTw{w8qVhY zd`sX}P0V3eJ@i`QeS9Q|t54*qfx!IC=|6x_J1mdu%Q+tKJbPj0v^t0UBc=A7$~-5q?>m58x^1w8OI_ zk_e|>0S5dnab4=-Tk409oE3Tci-46sb7)P%hlNS~st4vx^!v&upB!9${J^eC9c)3u zf7(6wan`lB>9#AaokHYyD(>miD{{3C1VKzcJ=tcZRojO5T-yy*fzj`wO77`_G#yo* z4l&UVx>M;}O?(uUneiyr)wQOQ(rhODT;i0+glv5vmVCXV)Uen_)j4TNNwM+n(`co{ z3r6@SDuF?Lbu4V7-J=07WKOQq&Ge(@vtb{~si~IBlOM-SY1crv%rl=HU4}+fL%mJSkVi zxsnIE)l$6EhFKEdx^1&eUp=pbD1+^@5wBLWCmOvlrSyIb@z4obo=KmtSjC`cGAF5D zGfpOs4j>M}s9s#y-f1pR6?YbaH4!DNTxl+JY}&j7GEF6Gqm!&Krm5Dvyu@<9K zf;SFry5+Re!0yKkoATHq%6WICP)jYNk+Wf1EU>AX0ZLkPGz(9qP@k*=@8`lgl_~D| zeG>&Yq}Be`UcFYXx{58X^`$!ef}c-ktmGmb{zft>9G<%R{G$i?vf+BI-e`%Oqnhvf zh(rW5=E}~it7JK_TbX6OBWlmI(d|>V%*pQ;ulPp{^F*wu%;~=@N3+5bR~SRXQ@SII zF98ptC*MA2{Id7+{nS0_tuAo4CrXm(O>FIRK8vZ{(ZD8(Z44X{D$rg_@-%rv{o4tWe zsvzhU%eY%_RAmSH)u3|)Rd@*yjSjjD?JaI0{ZYzzl z^w)So4%g*sMx{}65}zF=)5L9bS`X5K<#jHx`Z6u=cFzB$H9nDld`UkB$csPjU&pc|%JDC>m*yZPxxv3=^;?3I4+Hg5xuLHqPFT`@llVhy$Bh&aMAfZA<7&Q7wWM6XM zr=Ji{T|G_fgy(1U;BIH!n8E(@{M08jKaV0#2n%G6lRB#rZMO8uE0{S`U2D8momHN; z>@%@^u-hbD)#t!EU3z_Zi(4L2ssoI&|9R3J%L-Oy!U*5=oCWd=j3;F+dRnOPy()T$ zw<%CD)Cn}OupfTV>bWEuRAO6V6FqKJPG6~(O=2mF4oIt>N1c*-><*fjKjxYr92T!isDog)d#nHyB}!_d`AL?rLdL?yR1K8#Z;9i=W#; z(bjMl<+TDzT?Hk-vtMrZxVl?^ew@`yoD5%QQ})o!9nG%UayFUY(ijF1n4nFD+i#`lI7Uq~S7W=wqE@lbWun;i#wY zMWdbr5nF=mUGn?&KQ8yavtg$HL+65=*;eLi@*7Fk{bxpt@e>d zZ#rWCKiU7ZN&aw_LP~M{Yc^w0^6Jm43fr0^yeow2@?q%-1!qt;=G_strlEdx+<<<2*YLGLfmg%w=~vJcRW;^hLo~Sqhl}6;xmU#u>_t`r%o$yvB;CBE30&(m-lf*8{)1Rk@ zu&7p$uH(QKP9S@r3w|@RBTdLM`p=sa2sA1P0L?D|Y>DpOs^8uyH9P>VpkeCtJ9NlA z_alJWFtz0!+O!=MtIqJwXmkmvK>+XyG7Iy1fX6P$A3skA$S5KuGSV7>Dg!`Lbj}Xx z7Qe29H!Vxlm*U+345nH_0=6rxz4?xw0`S$lVWE~z3g`Zt{azspExIEIR9i|aGxBuj zj{tq88TnoST3iD_Cs14bZ@TSbRbG~-1APThEL$7DJg6Oj-{d=X;avg8Kri9}45+f3 zy5D0RkVyx!1r+(5m?l7z+?WR}q-p`cQZl>Bc5b#*goiV}3?>x5QV>2Kzzup4r7oZY z@oj&HN>cf8XQycwo!H3oROdm@QH&o;7cWHl)h$%l&j)I$*^&|SUVAJ0jtfFY*|6-r zLXO=M1T6@OMO|^f6~3i~K<&m6A{OIx;uC=?r;??M8>0LoUuP`f*@HL^EJ-$ z&U_>xr7sMAt?G8rPTkr69PryhDNv;xwCnxFWG1PD!_rB0`dZ2bXgAW1o`@4t-eg6o zT&dT4lHp=8+NA?fdvmH#;`R3?Rl6IyWJfOPsV4ZUk6!|hvg{{<0UpO>Z=}^69XN#(C z9o)?rl+qbZ@k_th#{Oc9EA+Mm1BJ9yZ>}UTvuyFP*P-1RNa@+utvCX&eTvqBd;RfC z8Z@W%LgFt6c;_=9C=F!~)t-H=^=mvshoVn}O^Ue-1Z6%&o_0YZ?KE!B=jb|030-sG zthtQ2+$H+D0~pe?YhvT}f_?^xJir#pN&A#L2Qq&9C8Mx2E(95HOse<&fjV@*)ER#J zm*AkhUxL9F{}4RzYs^adYh8!F+&1-VxLae8B}iS`>vd(iJLP&1Z2NbN?zDdd1*)>ta0aG4)emjV^O?Nhr4D!n z)A?(a|2Sw^mfDB{4iuK35TzqV0xn+<%g>e$x;tN|HT?{jgR`p6j%wmFD1TV{2EVuE zf^OxM(_S3Gz@7S7Ifz zasD#X_+P^Vs2DAcJqmmx%`K_uFxl zsEZ^V?U0z#2WZ z;-S_VWf%nYjT>S`mim}Ae(y)?<3WM*ad)VEdc0scnveLuSaVRkWhQ98z>?JrTlRQW zWI6@9huYmO8zGd5HU#}3SeInqm|n%-z)`)`ht$8*TMyc*!VxW|J16qd-!eqUv4=lek6X%n$kqnlN?Gj zMY2)a+p!-ojS5$ZH)+fmQ9A)*arei+T$TK*YbHcYaHD(J`sShdPl82%dF-J0wFJv}=uyCj)k*Sxtn@0=wg%}} zjN=N81bb&w9So0?(tgMq=S!hZ*P@*gS76eP`;*gQje&e=3eL#Mma&Wxlf5z|3*KLq@PKM~!8Yn009#dGX3zG5wUl$nTXcECC;uHcn%B-+e$n(1h>JNj0cl;?ROcuUeh zptVr;a7UzbimB~c>WXJ&Q2!H+n&F^Td3zR;g74@`O?*q|ijx0mn$jF2tyR+xmsz>w zp~kNiI^EQ4uP|UVr+^#Yu8j{_!Vg^9C@ntfsEhjy69imc3Wy0t@#(L9;Wxe;*X&Cd z7ru?Asd+_*n8SA}b1XijzvSo1RM*e$@EuMwJy3D7?Q>#f+D{SV2V+MYU+E^C)Ph(^ zJ&cekg1V-d&Yfw%F5Pz!_ykdQ@>-Pv(-e^5h!Iv8ZIE$_)DhHbzT^Q%EMjjsA-VQh z{HRZ-)|EWmd<&OIFl$G`b>-$o z>xANguvB@VPJQ$f0Vhc=$s`o42Hm-m_Z>ig%%n64LoUac#&`ygkmz0Yh>JH`_t>O4 zSKdubH0owV5l!_*YTbb+zKWb2i*G!>%C^~!aMGGLH@@&UW*l|&a3l=3|E0!{2wT2b z&yn3077>MWWZ+c-Hl3$Wp@Dv<|OShh096~FCXHy;^eAq z=9SIyV3aZ#EVi4Sp|0X3&w-t&;l#28`RH%&D{!_bUv+pgmKL&1uK!D@>keB@UR4Xa zZBU$hXhJfGQ7MeO_r9Iu!LSd!wZqn6`saYpec)Wticr4W-e)A*$(}+ zbJEnnkllnWqj=S=R;Q@agLMa(=aY z0W|FH1a`%?cPHovG)e^UbBIno9~IGb_yYH57@wW_Pzb7-2Y-}sjLnDP<&yc zzD$I5oO;BcJ3ft{M97;^a8@yi+s}qpvr&L-Kgp!Diu5{GmeMOs{_I7I#wN%R75IG1 z(rV_pWFo#Qi2Aepht{XzcgXC2b6-12uIBkWKBi62r$xE3^NG65?V@BQ9=#Za@`!zR zb9e!jJ2V=pq(2g*6P?uA$Kw}>A`C3+j*z9TZ}2tEK5lcvbBRd=p+)_%q+Oy{=Q)aF z+lx}@Y)zRvwDBtW-4LePS*q?@3Xrc4hKA!m79Q9B9$kYlIN6VnKU-n;sd7@c0Og^{ z?vHXVR#&k%>bF&q;301l=^I-G_5D-8G?T_uQW`L$p(DjD);sQQ2ue;!BQ0({IH`JK zNzMkB_suIv8HVIj+HbcbG#h(S6c9TC`-X}0EuTVNAcH111}AnVv(~NR`h3NUTiQw{ zeD<^LkSTrp-r)r*8U=)dxs^FkQ0e6E4KTwQ>A}ZNeYfjmPoJSa_(Xxjz_I4|Elu(m z9`{8Yl2s~V!hX6co`M1cqG zjLPStW&h@RKET0#I;K_>Dg()CPo5i;TWBFfO5TU`r)xAOtf)*jUHB%_&KI%y_2v9B zZ%Ym7e&e~C`J;Y&_(JJ{n#!&7TlFkXUxYG>x$Ie>_+;`)DKG!sue+(&J#`;PEN46lB;#QPsP?=K&gsWH zRagBth_I!UX~sZ*tR!bHA*bMmsdN{wt5vU36q2Xed#;_uJSmG~N!F}AZGE7kV)lfg zRS6o{BmFj8F>nW*(pcnHUl3qmfjTCBESmo*EbqilP1AsDMTjBW-+X;zXrx1AHpUSf zY5p=rc49S`-6dk&Oy0}9vT-i)xs0cZeMvJqXmuJyY=MZWG zbu8=;oaUwM1oSBGw?7aYJYDY%-@pwE0YHMM$wFn?#cz9Jhw9Rvqel5xt^k&!Q@qoU{^IaA!}0K)4U#X~Rqn^bfX zxAXE?`?5E^EN<-g4Gt;IK zz>QMJ$PovcP3$Oqev+g8O8bHL@O$AzhK|w13*{JDMew-QZ?=~^@?xbkz1*Z2`kr~Q zqlEmBzD~@z#H2=#>6p&&6>@3B=@Ge7iyV%%05*` z4A#x=KN_r4hj${?;X)d40xtz`6;}ai$0CJJ_K6|uAIr}+A6Jv+@XxPl&`*zW#z;)-jB<@2GcZPHtIRww|YqrvYB_ zpT=Uh5`OCO_-A>qF%9B?){(3VzOh-#jSMdyX=}x5+< z?q}6I8y*_CydlmJj?fXPUGjW%Nz};I)D)hrWby}es$KJoEEl*|G-g5++r;hOII#P> zJybn06{4>H{4bW2w@MAueOXK4xYqmKkQU{Qke%hPb4+0lv{@H$)=oS`6KHMPi+EJD`zBU z^_|RL27&F}suyZ60Z;7*uEoehvb4`KDseGdE&$@ayoDcIqfHg;#(Q|t zv0ulII9f?vUGnSUC3(2X^3v92KVg5Al8*Cyv!Rv;ojFlU}}0+8v2)*AXz8> zT$y5ngX2>&ONm*GSd0e+-4CrzxW(!68sKNI9HjgR$*?58@K$cx{Bw$N&;U)_!j#2J z#*?X~9GO=`>9e*n@vO3FXg4LE_$P;QTd|58U!Qz))qf|Hd^Dnf^0axLej=XpXkV=i zR0DMvfqN~7dx$C3j1LbOJKGQ@H%{t;#m+X?J6KTz@&W=H+X<o`MhKJV4ZU_$ zoJFn8>cR-PadqpaxqZ>r>PdF{fB2mK{OGqd(|N7e9eY*PtWo`7k77|-*7G-odMxWZ z3Q8^kF0v#I1nqpZjSFuhw=}B(oFQ*|yFJoGoQ?bjC}{o`oVhzoJCIRE_);k#>>pIb zhjp$rgyrs`m_Mtr)JmYfJ}huK?b^BW62=0hri79`LTAe^eEXHNb{o{tlmA`qU2lOUy0GT!3S$hJyKo9BC~4!A+-;pNf>VNlLq&2IZdsBFf#@JwICtTmJxfs}<<6 zHA#qY$7lTm?3|1a6*=?%E|8 zfF@>th}a|uSyJY5lxD2lkeKne1Cg7)J=&gS!>-gi4%B2tlnPI=JP$ifs z5-uAU6bck9&d%pCxj4SJQLm*<}VDAIooK&|1=d-UBzwHJDV?dJ&O=!+DXG1Z+ zItcp*{pY!yw~Dkc7!fMo#m#^0w|0_Yxv z@TniDTf`33)tA==7x@@k!OKwYmd#uop<=)6Hz@K8#)ivTF~|6hU7t%{OK^+7!BAd# zm0S%iNlpf~-?|ScAju3srXMj44&e1$c5)wj)*{|8hS}j3Va8D&_4eV}2R_s;PUg(E zuDxm`Csuo~y(SaPChyvAUIcFI?v>0%rqY_!;7+eYqBy66Xl1M*JWn`WplH90V(pGS zTg2;CEZ$kvD}wPFH;e1>sjqlrCFhtF@+Saw#WCX$c&fDk2o0196dGy2ay+r3$0yGx zZ1s6Bh9^E*1bQa)EY!==x~l?>co5M7!yW^vawgEQ<-8s$d+VvK9u+<7Z7k7~;w9Hg z9Zc{pPAz;*Ia$W=gX+I@f1M6Qo8P7up?t}-J7|FQx^_X*oR|!Es&t5wd@PblR>i`? z7(dqhJbd>XyHG;oUMB;H{F<_~Z{0kVy}Mpi(KA3IF5*^unNa2!?%G|Wlf)O`j^Jj) z?{-}FRsc-YPTwA#YL3XUikPhf4diO(&R5x!A zN!thbX7O?+JMt{27uMavyrs%}XGGT=AJ6J#y(iIhdLp?)dmN+Lf6>nzm?|s|{8E|4-JPUfsqRTDJeU13(0RjUi}!h^ub47rJ89uiD-%I1 z5(KiplG*#g@!}AGXJG)N$A>AB4%GGbAbB2e{=c%}ov&hHIl8g$BuF*?Nb)>kle*r5 zI=I0R%IbhfCi`88Dk}{FLCrq&XZugZyxyi(>>YOam5pM6Y!pEAt?D?iAi<l*Z)S~;brgQ81i2y5PV$}0}u%RLxR7Hhl8KLjfeMtoyO-`%)3;#f8m1`# literal 19791 zcmeFZcTkhx*EkrZsVJbRpp=L-8%>lB5euj^k*Xk|s5D6gjPyi7K%^Wz1p;jYLoen)Ah@xAaY~u~@9Go<2liXmZ-u%4+n??3|kR&CN}U{%xyK9G*xb)9Cb^ z{Gw*;kI3jasE5xzd*`XC>GvNKG$1;s&MDr1=u})%Zg|_G9@989G~xmGq0wl|%f$4r z-(H1;ZES3|cXS%+-FhDwJXBRZ)!0;@@$J=f9}SJm_wU-$`ukZ!L+Me`?FB_M%~+_@ z)8vTg&F=1d*7k_!zFno|`bHL!0U?#?8KZURoVcXsTomzVdvRh)O&umaDNWbFJTWyr zFeJj_-owPyFPc{k%&hFQatk~hpNx)<0fK-1`t{D8JLBUMZ)4(a8s7zfgaDJ5g%)kP`J_>A}0N3c63={A)&(Q1w64K{dHD!EvX^9zc4GO z0Pgeh?c28o1_p-?9rE?{EuX8cUvGg&z0kRBX5;2^@ZiDQ_wA6$p-xUtA>r>HK77c| z&h89*uBD~*)ZG&bduDh`Cq2!*80F*Pen$hMIn)!uqNFX)#J&oA@IKnPzUJi=E@p$6 z)b%sGvMgYR@DcICF(=Efey!Q~!QD+#@@W4%+IlL~?LKiXzPTaf-u*j~Q84}6x092f zudJ+YZ*MDW=`xwjjm^!8$*GaivAOvLDwR4gG|XTy*4EaiXJ&~+5^XK@{)1biqqzBn z#jftY`iAEAUtN6zgA0pG{ewfTZSB4N1A{{&*w&v7&Dh!b`SJ0|t?li>!QtNCemr5E zL?(}n;#ODJC=|-%Ulcgo}gV1 z^mMO+UUP_C2fplmdi{oOI??ekbK7{_?9nrcg(H#PlgOHH%7ajnO&o+8sU316|2=XO>pp^ zvyAB1$?>?M?RIlYd39BFmFaGPrT;;H&c(Y+X5G!nf%1Vs-;E5yb1s;c?*)OT9-gNL zeT%Sp4$gv#fk1CtxS5YHbmRBOppVoJOHp@$3PJ_wAE?H)3l7L)el1ykItVlAEx zN{0zbc#_)MySUBzICg_xybC2zI%fGFk#bIBYz5K@B*_UJt}=?UtlrmK`ZCZkV-pB( z!UpU}yq*iI!ar|D*Q>p6PavAa%TTVhXv6UM=b1wfq(`u&vaYm4^UQf?_6SA;a?L%^ zpjx%AY;8MQ<5|NvEJ@dXyRi$3z(U02{bF?wl6uIYKG9UCsJx z$6i13kgT)K6*{|UAGHhg;&N8vy6NXIQX_^LN(KAC?ov{ zPDlF*HD+Y$ye4Zgln{l^L0gcy1HixTT*#k|SnuNxPYDYex2z!pbDBM*iTZ{~CNxfMGLNmceurf&Rip z+OD}~t7jn}ZlZ0on$&9yJ~#}V-pBNLs&%`Vlx#g1(IQ{nJ(fZUshio)9JAqe2_o{$ zol`qAn?q?JTc`{LClDwOLIz6{o(VrdO%7&Q|RCOk&k)(IH_~_xc&&tGi>4VlBSehei=Ey+I&&>zxIcsm1}j@h7}Mzfk+Jx6=qjOD`cr(7hNno zT7J%_+UH`$CQLqIw50brI&abcK_kKMCDnd!AjC`CwCaakBD+pY0Gi*1aOjnnyAHFE zK=Q(Q^T{OSb^Z8~a$G65CFB5T&h93_wUhAbM$-lB4~DC=4F^|O0>@j|CKKIGaVpO_ zH=`t=Y~62=jh|xnG3QC9Yg`h;CN`l_re2-fMo6t9T7d z?Xr;cjL@5=VaV#$+YalYANb`=8?@Qcx6A{7g&_6oj(%(LoD1t%UnzfJ1&95D(Z3-I zVt%;`yf@5QWLQ3v8?!TBsVg>4QA**wl;(uNA3LxXVbpdETMD7a*6fxYt3hSXE}oMf z%N#YVIU05PJwIM^P9gwuBtJc!7qQ34Ka(uq?0EKM(nSqpEAQbte-q(nya!pHoZuiZ zefinDz^Utw(yHJ~-s_jY#(c|ceE}&^fD>_ExAKp>;xj@RF{lHLTKMpO-eqzqM!xnH z=vRFS!HB1wBj*(~um6m4%*$!HpF~#Gv`=32&uN{4$Zvf<8-y(~j^PVezE+IS8E|QD z;%4$*$eGru9^Jqz?qk|vz2$u}iF3&{r#I=I$ECRU@aR9c%0tv(Z;M))m}M;az0Zu; z{bdV--%orvp~<7OoF9T$a#5&;|L6fp^^=Y>f*=>T#)4`?@-tc@H`4<`6nCuJZzE zU|j*R(IXzDsyDYc@xJGW9nQORs3$1)M{$kGME!$IA;wb^iB8k?^7Ad8WiFG|3NiG; zTa=i6=o}H!wk}xDzBhnspA}J;pSc;5eR*tScYOm8d$Ua2vc`@P>x>RCZ;OjkguWoN zMWg)8;!Amd{pcl-n%-FnEz@Fev-z*c!cQ}mAhhFI%Io%(8rw(TsqubC;tkKl`=%2p zwsRR{S+QQ3&sUQJjoaowExAb#3Oh*WN@*pzrm$-HrUOYW~)G2dwMOWzLUVk^jwats~h7qSc zV`4eiuPr>BF}|2?!o7HML;Xzn1{^I>3s8^$1i4?xNuN7j!E3@6%-M;9(O;WYwCVTmK>+}fsJ03 z>k~cSutI7pM)I+$pi3|3F}8aHogSPgQVXwh`N@QnA_Ygj5;J*@e?eZ6*|_#8cKbl9 z=YkGiWjgsxmw<`v@tY}FeB$wnNsMj2WbfqmX3Zh*XGhkl;g9;}J`@(Gxp2Xpfs!JG z5wcxdAR+b(AM&lqxA8mSH{IT^8}-$tM#`4ITU*W@Vu8HVwAdHuIJ90V5ynJ z=SHE6ch_}l3zf4S{H4)_eYF92NGNj#JW1eBXm7Nw;KDLpqr7@2e!qgdZl;i=z5@pQ{BzcEW?d;M76S& zc@^Ss`_eIAgSE|Aun|%iA7nI%G|@-q z47r)>S6liD2}bY1Q4+t67S4tALghK)YZ46c*^cH`E1eNKSlynfsCI)zt=w>Eo}x^? zP?qvSL;!@*u%Gn4!VXz8JvQ%;d+h8eCRxMw)CZbsGckjSxx#2PT^?`T{<&3r-*uLT^Hm<5a@?%- zrIk1{p6ZDmXTQ$s_|!TbNsBkFjo?Qvwz4D|qQ3T#wNMyGt3bd5=Ct*@41|}6>{nP1 zR1Rm=-hqVFK6?@zLvONQGJd5K!+KRs@NI!RyruTFPvtE%I=MeaWUgBo*gz|=s;n&A zkLI}9n%xJXUn^3tQ*q1WTm)-vUt+fbo!Qm7v*x%a)tG<&`EF~u_}43zFTf9%yYxc& zH%GE0qI3o*p^FQ1)gxJZ{kut~TIA%3_QNciNo#_j@8Fg6GcD`7c*v%@P~*dx^w2kp z_v>P%%QZ9DA?4E?pi^&eR%w5|n^N;FPbAanYtpu=a%aOm{LO{)m4?d0Cj|Q^mPV!X z^>Fe8t~^3e*9S80ewpW)zR%!q877F2YjZ-lreGcPz~C%ay|fo~nl}bx>l`%8wLa;w zPX%xUySIiwiYQ%Zl?Z?SNod2y^~VMEW%IoLoN@;S2NCwIF9YdYO{_KXpYdEp!d)iv zvL9^c*6=Trp`{}U)0E288{X^hF3{t{Cq;bA`)Y@BxImXYFE1&eQc`O8ITFLr?btI-@>7e=pOp z)`s`-+Wa}@W3H3b!o&T^2l@=YJ9-3S5O&5H>_~qXT&iRwTdJ%6RF@dBMc< zh+D69fR3BuyqCk_xB^x7b4^Wr7Yo}#AVYPq_w=?gAkaZa&b~_r%sRO)!^3qOXcb~`9cB8x~9XzYG%2mbi>eIov zjB4W((p!6hh2D(B*{irw)OO(*_gd|lguYW$uFtp6z_%~IFLhP~f%J55zgBwnEy(uc zwPv9ul6lHWBMilEgj56Z&2gVM_oBVc3ao? zb>)9E)H=rD-!3#y*$HmnBu`)ETDgvFcu$CRHvs<@KqBZHpC${RAg2)`w~q-eF}nmX z{^{cvK9S%dS-)`}Enfi8QRnvS%|?6r7!&a*6@V3k{DpOtaWhA{YEXS2y!FYwk+=(F zskMrlOx@3%Q)Aw(*!AhpP|@WQA!%f#swn;IOTm)bQHL2V5U4l$$misje1Xm=b2v++ zA6HAin`aJJ=A@g@tr{5F4b}{RVJW$*E+?l*TLbC3Od@iKmEDTl0)cMnJ$S9OQ0kl~ zJodiaIE>mzi*PTMn!c^ox+8dHoZx4vvST2S%>T3xf`XqDId?eI1U~5sJD9+QUQ9n* ziadZl?BXd}r?)Z7G3VH>np?H^PM%tbsq!vML#$bdw|P!BMB&nvoQ`0p>Q14Kss$;K z-aHRh)s3P_IX+PbS=AMptyt*X|Em*tH_dRt`bts9y`JqZz$jq1((+ExZ<>J z6Sik1MTqP0kDXB05AW139l&l)W(R8I_=@OmWO6z>dprVP&OJQy3IFK3*xq`bNDtJ) ziB9LRKzhB#Ra^(2;{-ojAWH(n`x8h6w@y-DPHU5a7972P^#pa-`#lIdZ@?{YvO@|q zfI>qu0BkAB4ta41!+VZ50DLKw!1a*}!~5wfnXT>r3cQ(vl_e7Zz7+xlP}4m7~dRM|qB-8VKkUO_giJD|cFr$ZTsn41M8b zohGFgJG`pEGbkm4T3`-VnO8j+#OEO3g#@_c1GtfZP;0+KCN*m+O8GfijEN*lXTOsh zz-zA1X2NeQ4_%dEOYWg7Ep=sWc`^28@wFg)h1p)93 zz}@;GM(u=#s6r0XGhYE*!2j|(-e?X$_{@=>6y=;Kko?h7}G{VRy(~Z^Qtnl%7C@30hYdw z1|X%iTJh?Iqz6wYweEP%NK#+^Hs7H@q=U3Kw}T!d6Kou>+$_F zCm^5B0$=wfYZu>}BIR6A{lPm2Xl`lXn`cPlWe>Dz_8UYc!FI3RQNWeZK6xj9Du08& zVG@4$_gx#wygj*R-%(vM8|_Tk{cPtq+RRcKxi+2f* zS2cH=fm8Llw_&%-(_v zer4+v@33V%!Yt3U9vd_QkYsY^bXd60y5?Q`9wv3_H2SxiiD zF{JhQ$^KioqxDx(U-jR$8F-irmZDuPiDbN5_z2pyE%9u#U_cpcj@xIq3#S_ZUlPC9 zC$noe0KH_4^3ckzI#R!TxNGc4h)LSsZHWv086FLOQJlEA1D1|~T6_I9kFIxMP0QIV zK{!D`?U!wzd~Ddg-&z~6hGQ6;al!7)>`TDsxi=<8_c>$@UjnxHbwnVsVV}P4EB7bCY{2o4H?QWyy5`DrF>hZGbdAl0XIJuK zq`XJn1n-n4j7Ud8v!Pd|bmwC%g)vc|fv9dO?X6=aEofOfQdOK*wfC4S6tKH|0Qj2f z)NgmG>bzsZ$PzOO`Ki~{2smgsfK+%^v`;&wuQt-Y|B`%>TatKo;@iG`{)f25sm6z~ zl@@?cmNxG)t3F1h;4T!j|2hDW5)`CM-)S9gZ$hI&U4tJ30tFF5p!#%L!r)`aU;rub zF0}lQdiaHdb-L4hHNH4g)`*cnz}9_X_%EgyuEy(3+)p3tFKD;4VTZgqDMP(BN6#XM z(#o5+$Zg!SqXK2%6I2CmCnWxxk@*6%(GW(^65OP7O{dY|#X0v9BZ|CZ7miv60WwQ) zvgX%jJ5x75<9#dOQrkb;{VAnfqahA^5I-+HAoHI2@FsTi2ZX^OvCZ|!w39vwr9&q|lKRwdAVL9?UPF+ptLa1} ziBh*G09e7bp!vaT(o}_RQT+TzFr6riq<;a^8BfE}M}^rfN6&BGz^YCxZ{X9aMEXwv zzO#ivDvSaQGi#K)Noq%GFOkL5q_nyWl5OQ6RN6RC9TvZW@)BoA!l+33P9dYaEn4#f ze!XsyIndWKJM?|XsfW5=Gav5kGVqV$R&<-vqauoIeaHjNj#ts*2@wy;dUuCp?NirX z#x$<3wp}KtpE?xC`Wd-c!#Co94P@=cL}~Ul0IhcaiX$3g3e=p77T2ivNsH?Y*B5V2 z(RlYVgIqSFS#b*)gD(MneX+&L$?6nS3$RgldMEP5MSXs?A?62G!Vni+jePlpM;)SU zZQ8N@Nf)DSIf0kzT@4mF-a8jB9JNyX%2MJW9kCfJNm_|rT;b1qT#?zSfC#;KM9k^?qTJey2llkG33NK+&O(fko>vGjP_84;2`L~LG1Q*TgOng3#SE7BD1`%@Pnp$fbcEOwR$ z5s7mE^F5$-tIjN7O}A`21WtuLmbpctFcWLVJT~O)n%#7Kw>;&8@DqSpBr^&4P+eFp zKdpmmn>?04sojf6JR<)x3I(RzAT`xxv-&VgAy8Q zaN)(DV3zGtoWSVJV*9SA;7xinsUf>~VfujvbYgdq-$DAXh)|^MBiH8?h%yiAuxr$i z&*)S+;$$j(_SM1>yPYV!gcbb5OHOt>kvVG|@%j1Q46-^9iG4WY`uwxF<#q*tw*g*( z=ZhV|3*9D5sSTZOMC01sKNrTPO!a_5I0OL%)okZAF`SOufGZ?)wEp7E$56e)?vL!0 zrKmCv6L<@=M3LFo3*Fd3tstyW(1-oap{qb94HhnJdBri;@|)uT$m6ZU zh+KvHyQ&nP*-zkO3E-7z+j2I()q;Ody5BPs>St-+u=|<)P9T@Vq&{5x?_fr5G04Vl zJ?x!-PkK!H?~}ljlIR7csep@8=j`m1AB&!8+sPslRIEtZvy))9fjD+Z%dx=-()}5y z>SGlNYM-->oauM+oJY$z1S?u3s0z@Xq6qbK0jh`PJ9t+SHdAh9=6`$EY}>dUdO`wN zByi`dNtc;=wuKw2ECOZIlv7`gVuP~j#*Ji)`LpI71ZiceTy!8O*W;p5}CFaYpqKtVqbbAELPi(eE^oa*AZNTz{$8HTsm)d+q z@zZIl;!T_05Smyo%k54Fth*}qQiK^+e*d~vl$Pq*Jd1fx zK00lNxyeDow`DS3K=`lL@eyMLn#!stkxo|D64ZCGA2zOJ4{QojBo#mS83z* zTa8gF(UJu zgWuxaN2CG7x=}mDqkd^=@y2mfw=%DrDqgFsIqP&M)SHXmx$mH>@DdQ1fE9XtH_qG%wlx~(s+MXGMDUk)p{ecJtsRP_mQnpH|zr< z6IUsBks9{dR_-PAkA_NSgw1iUO3bXJiJ^S-f`fnTWuAN>FF(<$T$8dFVF|>b9SxoJ zX@BcLdgTukv`QxdUb%gvtHWvweXhpTBEqDX!T_#*Ll4H7iyY}U&%D}Nc#-CjN(cc~ z;FfJaQg)gNH#SjG%1sM75J2##=8ey0v$q!BK~45fzaoT00Hgrm zu7lWP$qg{Ze=+M5eKj#ur4adV$PD8oFMi55sDYaqKfM6hT5#!#dH81X;W4QtJyQd6 zqZXjIYevzh-^AFvB;Zb;n$6ye7z_cFF?Zv9kXw;3|E6TN6!kv%H)pMfn_8pw2V=ht zR}kA19OFN=Xii@ng++Pq*)=anl zpnw(J@JW{8YPg~P*o}kNcii14THmE(A#FOoLb_-|B?{=&=$DDk@|llzDM=qkfLB&qbP8cMEYK`{6wLiqJ>VcbP*J7StG z`gyZDR)_NP&UgIwu%nuL-Jj&2n=)d>pTjd8nCo=3ld5w2+he~4_!P97+P;cImgQE} z7Kr{G0`un$p(`a3`f%U-n!kn7=Dw_;k+QAA@h48f!G)p;=q^*xt6iA%MR!d0p3w9V*cu>MbGeJ0WFV!m%D7k_yj-4Bm|M1_*KgA!Kv!Ve6{h zS+TVrN52CO1K9Ewsqn;)YrY?0&=5R5Zo#|}`iGED)Z^Aw$Ft+ceR;vSKUAha`%{OC zG=^`+xdIsZ|6sH(;f0ud#9HkQ-w?M@<78c0aoDZLN54hvI(KL6NPG*|3somUI=su6 zo>tGp4tbwb$sA3kFx4Az&)dc*j4jA4!vbD?|Kfo_v_u8?;=>cvw4kAZBGLTmE5v8} znLl1ATAkR1;g#W|{|b);P9j{0&p8gXF48yxzzJOnRK4UnNAs$SmU4JApsx8n>m$bY z%tasEzFcL#&(V|dN$il4OXMqx37n9RS28sOr()^A$lK+8j|`zYL?F!eK{q{oVFBQ~ zf~LiOo{%0-HV=RabG-|zJ}v=ZHODZ0>9etk)_t#%KJ>A(jObq$8q&fZ%b`4_sA(^C z6s;gj`-AoG2m6^h5Myo!vgG_$<-X+$CwzOh=Hn0m&W7K`-S6+QRBOCner;1+w=q65 z`2BJES2Q&}`)r6*Rb{gxCd&WW4@Sm!{HKqqjY6fEBy{eOTygUYwo->Ek4g;7B$e*e zS9*naYiDY0X=1xGDy3T0qe2l6TQo~d5_=I?l2~l(a8j9yn_^`dWyylo00A!L#7_o& z*{497*HAOx4ALyAZr|UNd7X#ZoyqK$F3e1&XBoL>6Tuu})U^5&=(g)e7Cmg&o($LC z+!f+?;rw@ZtS33D1tx0L=cFUGc@9bFt>m@nI2AvNji2!UnAqpDoiL^OUk-Y%5Z&-?Z|$??{?U*(1_$+1OX)WFgcE0 z47=lc0n>7YH=*arm7#y5k0jWdX38aefVS^}Bqdlcrg2q{+)R05qdo)(;b+Npg37H+ zmdo#2H2+B!XGgyLKR)3P-VS@{fhztFzW_q1q)`irMBn80(i_+iI4A)9l$~{c+`aDP z(CFfmd;>MDT*k#BE{MrVnb*K%eS*nHBM?kkI!%L59vin?)Iq38?&>Q zAL8+H(-7F*{+;E;!9BG-1 zsc+ePNqrhq-%tc3!MD7B#C6yR>4V+%Nw`-X&v?M*!AHNO^@Ipl8jwnE2b*89^o_o! zXX&@0h~b@ToUw86aP1zV-`_ZIRA(QlDyw*b6T>T9J?Y5v>if6>NdqBJdaPxpFRH!N ztSZ~;Es&quh=alQ?<2%}A0JM%fG7?0msThN8-dVA?QRGnih1!09j+`g5NVuvsTP2o|#;FETqXbzx{*~3h}L7AJ_>TxsToA=<|+kms9CbD?QdU z)5lvHA4L?X#pI!KUiA` z8UA^dY+pGf9H}($m=6%L_puZpcl*rKlK+NPDI~r9-~67vAe>s^KK&!6Ip}|XaOkr2 zX<6a_CU|1|xTA+FXQB7T71zhWj_2q;UTkIG(*JunZJZiTD~~2C-Q`4L073tSB?=(#VUd5mqW4}WL9PPObpc?VYCga^ zN}Ud{KT_E{xkgkivY6^)NTKYJlOvt<+(i--GKNIzz^&XLJCRUEr3PJUbeTjz=;|JC|UE8CCI_`v{M;0|puZQj$6MsPj%hj3EU#1oz! z&0VoCnRUdyEo?9SAR2zt_ZD1libgXgv;6yHTHp26@`uF;M5eO*R=Z+4i5oZF#^Wqp zi)*7{uEEr+C7WA7*Z7vzQ5hHAZGL-(dDQyIsCuD_>Ns_&r&E)(S(us6u0U<_AO^}= zuN1THkX%+qi@9rAe&ca^t|^_t9vi1}+xdV#b7|*JpE<2eUv!_d?{&EPDw#K6l*N@j z^6qSouk=wVN#RsR0AD9$nyKs8#f_1!(S=&p@zt!;J#3#Tvq@ywr(q!GiL}l!s39!%SLIk(&_GBRh*6JJIA2#)}npUQ6A6AV@#sqS%~{8J;!nTmDq`5$@Jfn9 zjc+$ZMB~{SYrfNO#(TUugElJg2Zx{et1FBh9QK%nS%u#?)U;hi`j)>pKIWq*0;0(! z?uaHX9H{Ask4c|jz!8uQLPscIKf^uV4wxgy=rN_5lf+pX234YDQX(S?E2%8&j`IG> z0P3WvNw7~Pi`T6Dq=Pxd@Z8PIDpeTe6p9VKz0yNAQOiZ+E6s$I@9#KoBJfUd+e7`Yn5vV;q{I{@AoAn|Il$7GXg zOO}1%d*)zdFmWYnV1{=+iVNaPr+I(x&cV2%^61~iKSr*{-h?oWn4ufujOT#t*DtGR zsWNodjrOpn>g0tFXtqFXMm#W;h;ao(P-*<6wjPSJytXW|RGh)IME?tDQPOX2Ep(6H zO$`CA+2UYMmZd9H)yVr5wZSCS;%=hq-J?;;!`4>wsZ5hTneUx)jOc}4C_8Q!%a$po2*?5mvo;7NUN~v4D zd^jZ4cOpn2x*f5shavREny`RThsqtN*igQ#|l|&<$Y#AA7Xsq9_35 zYQ3TLQ4{?JfYo#=mH*N#34z!QOlT((qeDGeOcxba)a>0bwbTD{+k zsNTutMU)50af6Q~ekbyeS9IlJ#!}Sfoh;Ed76yOtAo1P9of^Q3M+8%IEuJ8!B5YgZ zv$3G}JAFSQHLEEUwqO4jGx}r!;Yc}dvHYU~KxOc;R$#HINlCo5{we)hSRis{*nMu# zjLZ*qG;SxY5BU}v>OWD8-oGDVG4_$f1cp`dL6rs8=!@tKtD z4|XJGoccy6vP^VRuHrueda9%{IRMkPqiH1`al>B}-2id8d9XQ0xq$Ti55H9|?OT<$ zrpcan=na_;bq9ih`qyFNvp#u5w0a9PcIMCwf9N{!A*VpffzgbfO&t^@yU+KAtINPhwJSd*6)288P$hG%&jnu zhGwsyCil*BlRiAhxPJWIDy=?}r0#)yyY5hB7C7fnrWuL^+a4zC8aNDgQLvI9<6z^J;FG|hCPEUppxqoZxxp89PpnC*?NT0BPQ z*s(IH@m@jGFWZ@fF{U4mQNEznC*zHQDZ%a>6Cfpc0JtNcKl&!wrq?$@v?yVfz?H;)%AVQCQ zwI^yW6?c@^E0hGOnPYg^EKw8LFMn6f=jqCBTB)?yXjC0S8ZxJe2ui zV4UhDyPp}%*EM;~LW5RT2`n~UGtowdWLTJqNeXV2hnjliGtQs79FQgfm0 zOscNXG;mNnwT6(IbmFQpxmwhxz%2S0-9b8~=CF0W#WS+Ba|~Z3s(Kf-B@tEy^J(YC zpem#404>^5idGV6rLADW2$TpdP#^Yba4SCNVPGB5*y%j{?RIrD$@H^_3wNTRg7!_g)ys70X_UW{8f z_|lu)Yu+kqIp`j4v|w@{o(HYle}^$)2W+a2&gC-m^q9!bupX-}9n3V@_hE7$#Kko% zWHNE3qo7)jUMhz^HMVE1tDWUCagn4=P09txW;=7IR2^qWYL&K=J!6FEUz@@;mK}K_ zoPDf5O$KQ>Y_MH_S9)qd$DZ+FZqWWPosD%^9M);u;x`_|yOBD2EMlWNl+@WTFwJc3 zX}}*=QD1&q?}tgvUv3#8)p&?!j+0$ZrW6x%P}t5oAA_2?uU}aNxL94nmOq6DI}yAs z?c!@tvr+}7U)yn-)%-~kbp#jjxMl-)ipvWB2=)zqoH(z1?d)&Q0hCS5mW~U=hOa1;$=x-KyTiS&aDU^8679;84pOE>Ow)WC4pETUSk0T6JS~F?5>o?B61f z%6kfZ5;zxN##(1V<-a~Q)Ds02U$=U1WgvE@G{G~G4`x?rz1DF$zlyanaG9t_qcQ=>bsD#5Q-!vh_<$m*tns#p;35)T}^xpUo zi$0m}->&|Pu6JM{@z%qv+xNmsI1!+^el~`hQ^VW8GyYG#7eZa79UM zbl-B-^3%SWv2sgELzRRmaw;y=o44F>S>vg8$1bL30IQ!?sqaE7J?%@kutjVoC?7(U z*4Iv)boSjyKx#ll=t0DlS@}n(OdH~J7tHm$nl@8E#`Q|6o;U9@_aN%`0r|Zd|FK-9 z$Jv+FUow1@2OFD#f6cr&o;oTJY@de3tmQ0E6Rgo>L3G1EB@BpP-}-&53>7uYPe=Lg z1#d5erO)D@1uD4BRKMF#W*s5A7%3aC?v@J1z`D>Hpy4>l!zf~5eJf;b#hhq67QB~^o%o;kXZs}fw zK>I_|JuGV)73PBS-)fVyiBYe<=LQhBI+ajqbnl;n=f&Qg0tWqMYYDZ_ZMi*XEYCaN z|6<(qW;?2OH5e^U?{m3avzkF`$l+#v{5c+X#Wl?B;9sGgl);8B3>nC7Cb&&mgj{FW|i zw~GZzry7GoZ~i!os+Cm`;^qOuwT3Z#!TE3Xw@Y7sr-GUevVZr62O;uLQTTWG-*w{u z;Gw6+V?g}?>~B`P=$Si_qPZ@)aLWDw&IEh342N?weKY!W$cwPk0mvC3nf3Q4HSQe8 z%Jy*VA10arWj9VUPf0I;H6X*ojCax7H1v(t0Nu5ME9sBC#y{`a4Px zqOB;~1o)GYnc?o*o?(FdF>{f|hl0?O^fD(5av)*tEIMC}R75S6Q|%}mVmbr8I$ILE z-RIvs+NXdW?e+`9j@JN$c=g8Ntq(|soc>Dc=!|IU2I{egYyXBJD0tXfp-Gr6eP#f- zu%(9`FxAy=A$%_|dg9@_Md@JCQ^S^$L7@2zE_8XLSFfO_Xy->^po}l))w+e-4XVGq zxesBN0Q1*_bAv$KV$^q-YA}MDzn}yH9n~g_2l8ss-xeZ3^2aez`|CEAVT%2{ARTe4 zLbazae#yaX1(&2csY+>JHooJw!QXz?5_fVz!YrK_h#Pnc$9s>T7^W-#l;|wyZ>o zEzc_3i^4sKhuX+ppvJ(mWk#I{z9+1Gr3;&*nry@hPq#{LCx0oa>fPlEgqf5`nJJ9-x>Nj3f-C8oC0%?oj`f9nE~^4D2?5g2kyDnI?(AeHD3O z3<{DwtiOY54}vk@`*?>xUGkz)rV?-!%T6W07O7Sm4_x|cJV@W}$!h<{xa>(!-OFwXNW{17UDI4T;EitbHTiA@#MNOMn&Fb}NLIj~MkMZH|kyM68=c_)jL|67dz&T4JX~!g!g_ zTvI4eP7Er$>?Cjv!LGtOki}K4jNfD~*Gz^G@^COOUivdXU&@RxQFhgUb$ahUUS{F2 z2DBe&h5iGh5ePZHwMhxWQx35-Nf>pT6Jx`*ME_+i=~+v!uAMAP?#;7e#$-F6ZJ+_# zh-*K9q1Oj^`u`Uj~dlK~G_yCMa zIUyYAM<-i>YZwp-j&@?NJ)N{ntV_kZky)yxRRPYvkcww=D0@0HJc=RXS?*221tHrIuChQe#7F1o;{?@PyC+Vy zMAUl*jFX-Y$AYQ6TAOPH9zDntv!}dP_^JB6Gu#2_Ps1QkoF24icpJvt+yGl6)$8;a z)dMKq%-1-t#cdi)FOer>?zFG8%UX>;qjtiX4RnczKQMzjvbta)1SAXv1xMh82hBWu z@N4Hb*w5CcPr#iOP+Ux?Hn4{Y+*acc3;DicrxlMonap3)ap&BoE0dQTA z$NB%q2;?21mcn$!fgPFm?@xeK_|sate}6+zHUVG1Yz`h}Y66_w$pRnhoT}sSXvW{~ zxH?8i_Q>1T2uPAQI}p(TZFEwgcqWqzY4<&4B_ayJqLsstms|nS76V6uTjd3ThAr@A zHl4&sS*S_Gkq~d4&W-+t9Egb8uYfJ6Z7<#RKj@cu{>{pg8Gi!&n5WBNG zrjWdi>KkP#IhO|i8U0^F)-VLw65szbt3T}X-0WEmj3@@x64!{5l*E!$tK_0oAjM#0 zU}U0eV5Vze8e(8NoH

::check_split_root(ReqT& req) { root = std::move(new_root); // We need to notify about the root change, before splitting the node, so that correct dependencies are set - ret = on_root_changed(root, req.m_op_context); + ret = on_root_changed(root, nullptr, req.m_op_context); if (ret != btree_status_t::success) { free_node(root, locktype_t::WRITE, req.m_op_context); unlock_node(child_node, locktype_t::WRITE); @@ -236,9 +236,9 @@ btree_status_t Btree< K, V >::check_split_root(ReqT& req) { ret = split_node(root, child_node, root->total_entries(), &split_key, req.m_op_context); if (ret != btree_status_t::success) { + on_root_changed(child_node, root, req.m_op_context); // Revert it back free_node(root, locktype_t::WRITE, req.m_op_context); root = std::move(child_node); - on_root_changed(root, req.m_op_context); // Revert it back unlock_node(root, locktype_t::WRITE); } else { if (req.route_tracing) { append_route_trace(req, child_node, btree_event_t::SPLIT); } diff --git a/src/include/homestore/btree/detail/btree_node.hpp b/src/include/homestore/btree/detail/btree_node.hpp index a3285ef35..b516988d7 100644 --- a/src/include/homestore/btree/detail/btree_node.hpp +++ b/src/include/homestore/btree/detail/btree_node.hpp @@ -37,6 +37,7 @@ struct transient_hdr_t { /* these variables are accessed without taking lock and are not expected to change after init */ uint8_t leaf_node{0}; uint64_t max_keys_in_node{0}; + uint64_t min_keys_in_node{0}; // to specify the threshold for triggering merge bool is_leaf() const { return (leaf_node != 0); } }; @@ -116,6 +117,7 @@ class BtreeNode : public sisl::ObjLifeCounter< BtreeNode > { m_trans_hdr.leaf_node = is_leaf; #ifdef _PRERELEASE m_trans_hdr.max_keys_in_node = cfg.m_max_keys_in_node; + m_trans_hdr.min_keys_in_node = cfg.m_min_keys_in_node; #endif } @@ -299,6 +301,7 @@ class BtreeNode : public sisl::ObjLifeCounter< BtreeNode > { template < typename K > K get_first_key() const { + if (total_entries() == 0) { return K{}; } return get_nth_key< K >(0, true); } @@ -333,6 +336,7 @@ class BtreeNode : public sisl::ObjLifeCounter< BtreeNode > { // uint32_t total_entries() const { return (has_valid_edge() ? total_entries() + 1 : total_entries()); } uint64_t max_keys_in_node() const { return m_trans_hdr.max_keys_in_node; } + uint64_t min_keys_in_node() const { return m_trans_hdr.min_keys_in_node; } void lock(locktype_t l) const { if (l == locktype_t::READ) { @@ -392,6 +396,12 @@ class BtreeNode : public sisl::ObjLifeCounter< BtreeNode > { } fmt::format_to(std::back_inserter(str), "]"); } + + // Should not happen + if (this->is_node_deleted()) { + fmt::format_to(std::back_inserter(str), " **DELETED** "); + } + return str; } @@ -527,6 +537,9 @@ class BtreeNode : public sisl::ObjLifeCounter< BtreeNode > { virtual uint32_t occupied_size() const { return (node_data_size() - available_size()); } bool is_merge_needed(const BtreeConfig& cfg) const { + if (min_keys_in_node()) { + return total_entries() < min_keys_in_node(); + } #if 0 #ifdef _PRERELEASE if (iomgr_flip::instance()->test_flip("btree_merge_node") && occupied_size() < node_data_size) { diff --git a/src/include/homestore/btree/detail/btree_node_mgr.ipp b/src/include/homestore/btree/detail/btree_node_mgr.ipp index a5b0317de..3b2383dd2 100644 --- a/src/include/homestore/btree/detail/btree_node_mgr.ipp +++ b/src/include/homestore/btree/detail/btree_node_mgr.ipp @@ -42,7 +42,7 @@ btree_status_t Btree< K, V >::create_root_node(void* op_context) { } m_root_node_info = BtreeLinkInfo{root->node_id(), root->link_version()}; - ret = on_root_changed(root, op_context); + ret = on_root_changed(root, nullptr, op_context); if (ret != btree_status_t::success) { free_node(root, locktype_t::NONE, op_context); m_root_node_info = BtreeLinkInfo{}; diff --git a/src/include/homestore/btree/detail/btree_remove_impl.ipp b/src/include/homestore/btree/detail/btree_remove_impl.ipp index 82213dcc6..6b0d78a5f 100644 --- a/src/include/homestore/btree/detail/btree_remove_impl.ipp +++ b/src/include/homestore/btree/detail/btree_remove_impl.ipp @@ -199,7 +199,7 @@ btree_status_t Btree< K, V >::check_collapse_root(ReqT& req) { goto done; } - ret = on_root_changed(child, req.m_op_context); + ret = on_root_changed(child, root, req.m_op_context); if (ret != btree_status_t::success) { unlock_node(child, locktype_t::WRITE); unlock_node(root, locktype_t::WRITE); @@ -476,7 +476,6 @@ btree_status_t Btree< K, V >::merge_nodes(const BtreeNodePtr& parent_node, const ++idx; } #endif - ret = transact_nodes(new_nodes, old_nodes, leftmost_node, parent_node, context); } diff --git a/src/include/homestore/btree/detail/simple_node.hpp b/src/include/homestore/btree/detail/simple_node.hpp index 1f4c30e32..fecc04e2e 100644 --- a/src/include/homestore/btree/detail/simple_node.hpp +++ b/src/include/homestore/btree/detail/simple_node.hpp @@ -229,8 +229,9 @@ class SimpleNode : public VariantNode< K, V > { } return str; } + std::string to_dot_keys() const override { - return to_dot_keys_impl(std::is_same{}); + return to_dot_keys_impl(std::is_same().key()), uint64_t>{}); } std::string to_dot_keys_impl(std::false_type) const { diff --git a/src/include/homestore/btree/mem_btree.hpp b/src/include/homestore/btree/mem_btree.hpp index ce606fc5a..4b3ea6f56 100644 --- a/src/include/homestore/btree/mem_btree.hpp +++ b/src/include/homestore/btree/mem_btree.hpp @@ -81,6 +81,8 @@ class MemBtree : public Btree< K, V > { return btree_status_t::success; } - btree_status_t on_root_changed(BtreeNodePtr const&, void*) override { return btree_status_t::success; } + btree_status_t on_root_changed(BtreeNodePtr const &, BtreeNodePtr const &, void *) override { + return btree_status_t::success; + } }; } // namespace homestore diff --git a/src/include/homestore/index/index_table.hpp b/src/include/homestore/index/index_table.hpp index 2bec275e3..08c36a767 100644 --- a/src/include/homestore/index/index_table.hpp +++ b/src/include/homestore/index/index_table.hpp @@ -78,7 +78,8 @@ class IndexTable : public IndexTableBase, public Btree< K, V > { } void destroy() override { - Btree< K, V >::destroy_btree(nullptr); + auto cpg = cp_mgr().cp_guard(); + Btree::destroy_btree(cpg.context(cp_consumer_t::INDEX_SVC)); m_sb.destroy(); } @@ -130,13 +131,16 @@ class IndexTable : public IndexTableBase, public Btree< K, V > { idx_buf->m_dirtied_cp_id = cpg->id(); BtreeNodePtr bn = BtreeNodePtr{n}; - LOGTRACEMOD(wbcache, "repair_node cp={} buf={}", cpg->id(), idx_buf->to_string()); - repair_links(bn, (void*)cpg.context(cp_consumer_t::INDEX_SVC)); + // Only for interior nodes we need to repair its links + if (!bn->is_leaf()) { + LOGTRACEMOD(wbcache, "repair_node cp={} buf={}", cpg->id(), idx_buf->to_string()); + repair_links(bn, (void *) cpg.context(cp_consumer_t::INDEX_SVC)); + } if (idx_buf->m_up_buffer && idx_buf->m_up_buffer->is_meta_buf()) { // Our up buffer is a meta buffer, which means that we are the new root node, we need to update the // meta_buf with new root as well - on_root_changed(bn, (void*)cpg.context(cp_consumer_t::INDEX_SVC)); + on_root_changed(bn, nullptr, (void *) cpg.context(cp_consumer_t::INDEX_SVC)); } } @@ -223,7 +227,8 @@ class IndexTable : public IndexTableBase, public Btree< K, V > { wb_cache().free_buf(n->m_idx_buf, r_cast< CPContext* >(context)); } - btree_status_t on_root_changed(BtreeNodePtr const& new_root, void* context) override { + btree_status_t + on_root_changed(BtreeNodePtr const &new_root, BtreeNodePtr const &freed_root, void *context) override { m_sb->root_node = new_root->node_id(); m_sb->root_link_version = new_root->link_version(); @@ -232,12 +237,18 @@ class IndexTable : public IndexTableBase, public Btree< K, V > { } auto& root_buf = static_cast< IndexBtreeNode* >(new_root.get())->m_idx_buf; - wb_cache().transact_bufs(ordinal(), m_sb_buffer, root_buf, {}, {}, r_cast< CPContext* >(context)); + IndexBufferPtrList freed_bufs; + if (freed_root) { + freed_bufs.push_back(static_cast(freed_root.get())->m_idx_buf); + } + // Meta is similar to a leftmost child here - it should always be the up buffer for (both) root(s) + wb_cache().transact_bufs(ordinal(), nullptr, m_sb_buffer, {root_buf}, freed_bufs, + r_cast(context)); return btree_status_t::success; } btree_status_t repair_links(BtreeNodePtr const& parent_node, void* cp_ctx) { - BT_LOG(DEBUG, "Repairing links for parent node {}", parent_node->to_string()); + BT_LOG(DEBUG, "Repairing links for parent node [{}]", parent_node->to_string()); // Get the last key in the node auto const last_parent_key = parent_node->get_last_key< K >(); @@ -247,7 +258,15 @@ class IndexTable : public IndexTableBase, public Btree< K, V > { parent_node->node_id()); return btree_status_t::not_found; } - BT_LOG(INFO, "Repairing node={} with last_parent_key={}", parent_node->to_string(), + + // Get all current child ids + std::set orig_child_ids; + for (uint32_t i = 0; i < parent_node->total_entries(); ++i) { + BtreeLinkInfo link_info; + parent_node->get_nth_value(i, &link_info, true); + orig_child_ids.insert(link_info.bnode_id()); + } + BT_LOG(INFO, "Repairing node=[{}] with last_parent_key={}", parent_node->to_string(), last_parent_key.to_string()); // Get the first child node and its link info @@ -272,21 +291,41 @@ class IndexTable : public IndexTableBase, public Btree< K, V > { auto cur_parent = parent_node; BtreeNodeList new_parent_nodes; do { - if (child_node->has_valid_edge() || - (child_node->is_leaf() && (child_node->next_bnode() == empty_bnodeid))) { - BT_DBG_ASSERT(is_parent_edge_node, - "Child node={} is an edge node but parent_node={} is not an edge node", - child_node->node_id(), cur_parent->node_id()); - cur_parent->set_edge_value(BtreeLinkInfo{child_node->node_id(), child_node->link_version()}); + if (child_node->has_valid_edge() || (child_node->is_leaf() && child_node->next_bnode() == empty_bnodeid)) { + if (child_node->is_node_deleted()) { + // Edge node is merged, we need to set the current last entry as edge + if (cur_parent->total_entries() > 0) { + auto prev_val = V{}; + cur_parent->get_nth_value(cur_parent->total_entries() - 1, &prev_val, true); + cur_parent->remove(cur_parent->total_entries() - 1); + cur_parent->set_edge_value(prev_val); + BT_LOG(INFO, "Reparing node={}, child_node=[{}] is deleted, set previous as edge_value={}", + cur_parent->node_id(), child_node->to_string(), prev_val.to_string()); + } else { + BT_LOG(INFO, "Found an empty interior node {} with maybe all childs deleted", + cur_parent->node_id()); + } + } else { + // Update edge and finish + BT_LOG(INFO, "Repairing node={}, child_node=[{}] is an edge node, end loop", cur_parent->node_id(), + child_node->to_string()); + child_node->set_next_bnode(empty_bnodeid); + write_node_impl(child_node, cp_ctx); + cur_parent->set_edge_value(BtreeLinkInfo{child_node->node_id(), child_node->link_version()}); + } break; } auto const child_last_key = child_node->get_last_key< K >(); - BT_LOG(INFO, "Repairing node={} child_node={} child_last_key={}", cur_parent->node_id(), + BT_LOG(INFO, "Repairing node={}, child_node=[{}] child_last_key={}", cur_parent->node_id(), child_node->to_string(), child_last_key.to_string()); - if (child_last_key.compare(last_parent_key) > 0) { - // We have reached the last key, we can stop now + // There can be cases where the child level merge is successfully persisted but the parent level is not. + // In this case, you may have your rightmost child node with last key greater than the last_parent_key. We + // have to check the original child ids to see if it's one of the original children. + if (!is_parent_edge_node && child_last_key.compare(last_parent_key) > 0 + && orig_child_ids.find(child_node->node_id()) == orig_child_ids.end()) { + // We have reached a child beyond this parent, we can stop now break; } @@ -309,20 +348,34 @@ class IndexTable : public IndexTableBase, public Btree< K, V > { } // Insert the last key of the child node into parent node - cur_parent->insert(cur_parent->total_entries(), child_last_key, - BtreeLinkInfo{child_node->node_id(), child_node->link_version()}); + if (!child_node->is_node_deleted()) { + cur_parent->insert(cur_parent->total_entries(), + child_node->total_entries() > 0 ? child_last_key : last_parent_key, + BtreeLinkInfo{child_node->node_id(), child_node->link_version()}); + if (child_node->total_entries() == 0) { + // There should be at most one empty child node per parent - if we find one, we should stop here + BT_LOG(INFO, "Repairing node={}, child_node=[{}] is empty, end loop", cur_parent->node_id(), + child_node->to_string()); + break; + } + } else { + // Node deleted indicates it's freed & no longer used during recovery + BT_LOG(INFO, "Repairing node={}, child node=[{}] is deleted, skipping the insert", + cur_parent->node_id(), child_node->to_string()); + } - BT_LOG(INFO, "Repairing node={}, repaired so_far={}", cur_parent->node_id(), cur_parent->to_string()); + BT_LOG(INFO, "Repairing node={}, repaired so_far=[{}]", cur_parent->node_id(), cur_parent->to_string()); // Move to the next child node - this->unlock_node(child_node, locktype_t::READ); auto const next_node_id = child_node->next_bnode(); + this->unlock_node(child_node, locktype_t::READ); if (next_node_id == empty_bnodeid) { BT_LOG_ASSERT(false, "Child node={} next_node_id is empty, while its not a edge node, parent_node={} " "repair is partial", child_node->node_id(), parent_node->node_id()); ret = btree_status_t::not_found; + child_node = nullptr; break; } @@ -330,10 +383,21 @@ class IndexTable : public IndexTableBase, public Btree< K, V > { if (ret != btree_status_t::success) { BT_LOG_ASSERT(false, "Parent node={} repair is partial, because child_node get has failed with ret={}", parent_node->node_id(), enum_name(ret)); + child_node = nullptr; break; } } while (true); - this->unlock_node(child_node, locktype_t::READ); + + if (child_node) { + this->unlock_node(child_node, locktype_t::READ); + } + + if (parent_node->total_entries() == 0 && !parent_node->has_valid_edge()) { + // We shouldn't have an empty interior node in the tree, let's delete it. + // The buf will be released by the caller + BT_LOG(INFO, "Parent node={} is empty, deleting it", parent_node->node_id()); + parent_node->set_node_deleted(); + } if (ret == btree_status_t::success) { ret = transact_nodes(new_parent_nodes, {}, parent_node, nullptr, cp_ctx); diff --git a/src/lib/common/crash_simulator.hpp b/src/lib/common/crash_simulator.hpp index 98c22fe17..cfe8ec327 100644 --- a/src/lib/common/crash_simulator.hpp +++ b/src/lib/common/crash_simulator.hpp @@ -13,9 +13,8 @@ class CrashSimulator { ~CrashSimulator() = default; void crash() { + m_crashed.update([](auto *s) { *s = true; }); if (m_restart_cb) { - m_crashed.update([](auto* s) { *s = true; }); - // We can restart on a new thread to allow other operations to continue std::thread t([cb = std::move(m_restart_cb)]() { // Restart could destroy this pointer, so we are storing in local variable and then calling. diff --git a/src/lib/device/virtual_dev.cpp b/src/lib/device/virtual_dev.cpp index 3665f13b9..ac49f95dd 100644 --- a/src/lib/device/virtual_dev.cpp +++ b/src/lib/device/virtual_dev.cpp @@ -424,6 +424,8 @@ std::error_code VirtualDev::sync_write(const char* buf, uint32_t size, BlkId con Chunk* chunk; uint64_t const dev_offset = to_dev_offset(bid, &chunk); + HS_LOG(TRACE, device, "Writing sync in device: {}, offset = {}", chunk->physical_dev_mutable()->pdev_id(), + dev_offset); if (sisl_unlikely(dev_offset == INVALID_DEV_OFFSET)) { return std::make_error_code(std::errc::resource_unavailable_try_again); } @@ -436,6 +438,9 @@ std::error_code VirtualDev::sync_write(const char* buf, uint32_t size, cshared< if (hs()->crash_simulator().is_crashed()) { return std::error_code{}; } #endif + HS_LOG(TRACE, device, "Writing sync in device: {}, offset = {}", chunk->physical_dev_mutable()->pdev_id(), + chunk->start_offset() + offset_in_chunk); + if (sisl_unlikely(!is_chunk_available(chunk))) { return std::make_error_code(std::errc::resource_unavailable_try_again); } @@ -457,6 +462,8 @@ std::error_code VirtualDev::sync_writev(const iovec* iov, int iovcnt, BlkId cons auto const size = get_len(iov, iovcnt); auto* pdev = chunk->physical_dev_mutable(); + HS_LOG(TRACE, device, "Writing sync in device: {}, offset = {}", pdev->pdev_id(), dev_offset); + COUNTER_INCREMENT(m_metrics, vdev_write_count, 1); if (sisl_unlikely(!hs_utils::mod_aligned_sz(dev_offset, pdev->align_size()))) { COUNTER_INCREMENT(m_metrics, unalign_writes, 1); @@ -479,6 +486,8 @@ std::error_code VirtualDev::sync_writev(const iovec* iov, int iovcnt, cshared< C auto const size = get_len(iov, iovcnt); auto* pdev = chunk->physical_dev_mutable(); + HS_LOG(TRACE, device, "Writing sync in device: {}, offset = {}", pdev->pdev_id(), dev_offset); + COUNTER_INCREMENT(m_metrics, vdev_write_count, 1); if (sisl_unlikely(!hs_utils::mod_aligned_sz(dev_offset, pdev->align_size()))) { COUNTER_INCREMENT(m_metrics, unalign_writes, 1); diff --git a/src/lib/homestore.cpp b/src/lib/homestore.cpp index af2d521c5..b0bd2ff54 100644 --- a/src/lib/homestore.cpp +++ b/src/lib/homestore.cpp @@ -321,8 +321,6 @@ void HomeStore::shutdown() { #ifdef _PRERELEASE flip::Flip::instance().stop_rpc_server(); #endif - - HomeStore::reset_instance(); LOGINFO("Homestore is completed its shutdown"); } diff --git a/src/lib/index/README.md b/src/lib/index/README.md index db24c9fbb..cb1805e79 100644 --- a/src/lib/index/README.md +++ b/src/lib/index/README.md @@ -91,7 +91,14 @@ Merge node happens somewhat similar to split node with respect to the atomicity Homestore Index service creates 2 more new nodes to replace C30, C40 lets say C30' and C40' respectively and now C20 is linked to C30' and C40' and C30, C40 nodes are deleted. -With this in perspective, dependency graph will link similar to split nodes with new nodes linked to left child, which is linked to parent node. +We can notice that we have to choose 1 of the 2 node sets, `(C20, C30, C40)` or `(C20, C30', C40')` as the current +state. **Whether we use the new nodes or the old ones completely depends on the persistence of leftmost child C20.** + +With this in perspective, dependency graph will link similar to split nodes with new nodes/freed nodes linked to left +child, which is linked to parent node. + +(NOTE that logically the node release should depend on the leftmost child - but in practice let's do it in a simpler way +since it would be very unlikely of freed nodes being overwritten in the same cp) ![Child_Node_Merge](../../../docs/imgs/Child_Node_Merge_1.png) @@ -100,4 +107,4 @@ The journal entries will need to record the existing node deletion information a ``` { , , , } {P10, C20, [C30', C40'], [C30, C40]} -``` \ No newline at end of file +``` diff --git a/src/lib/index/index_cp.cpp b/src/lib/index/index_cp.cpp index 955bd523f..a2e6ed72d 100644 --- a/src/lib/index/index_cp.cpp +++ b/src/lib/index/index_cp.cpp @@ -249,7 +249,6 @@ void IndexCPContext::process_txn_record(txn_record const* rec, std::map< BlkId, } if (up_buf) { - DEBUG_ASSERT(((buf->m_up_buffer == nullptr) || (buf->m_up_buffer == up_buf)), "Inconsistent up buffer"); auto real_up_buf = (up_buf->m_created_cp_id == cpg->id()) ? up_buf->m_up_buffer : up_buf; #ifndef NDEBUG @@ -267,6 +266,20 @@ void IndexCPContext::process_txn_record(txn_record const* rec, std::map< BlkId, #endif if (buf->m_up_buffer != real_up_buf) { + if (buf->m_up_buffer) { + buf->m_up_buffer->m_wait_for_down_buffers.decrement(1); +#ifndef NDEBUG + bool found{false}; + for (auto it = buf->m_up_buffer->m_down_buffers.begin(); it != buf->m_up_buffer->m_down_buffers.end(); ++it) { + if (it->lock() == buf) { + buf->m_up_buffer->m_down_buffers.erase(it); + found = true; + break; + } + } + HS_DBG_ASSERT(found, "Down buffer is linked to Up buf, but up_buf doesn't have down_buf in its list"); +#endif + } real_up_buf->m_wait_for_down_buffers.increment(1); buf->m_up_buffer = real_up_buf; } diff --git a/src/lib/index/wb_cache.cpp b/src/lib/index/wb_cache.cpp index 1b7523363..6a20341c6 100644 --- a/src/lib/index/wb_cache.cpp +++ b/src/lib/index/wb_cache.cpp @@ -213,12 +213,12 @@ static void set_crash_flips(IndexBufferPtr const& parent_buf, IndexBufferPtr con IndexBufferPtrList const& new_node_bufs, IndexBufferPtrList const& freed_node_bufs) { // TODO: Need an API from flip to quickly check if flip is enabled, so this method doesn't check flip_enabled a // bunch of times. - if (parent_buf && parent_buf->is_meta_buf()) { + if (parent_buf == nullptr && child_buf->is_meta_buf()) { // Split or merge happening on root if (iomgr_flip::instance()->test_flip("crash_flush_on_meta")) { - parent_buf->set_crash_flag(); - } else if (iomgr_flip::instance()->test_flip("crash_flush_on_root")) { child_buf->set_crash_flag(); + } else if (iomgr_flip::instance()->test_flip("crash_flush_on_root")) { + new_node_bufs[0]->set_crash_flag(); } } else if ((new_node_bufs.size() == 1) && freed_node_bufs.empty()) { // Its a split node situation @@ -237,6 +237,8 @@ static void set_crash_flips(IndexBufferPtr const& parent_buf, IndexBufferPtr con child_buf->set_crash_flag(); } else if (iomgr_flip::instance()->test_flip("crash_flush_on_merge_at_right_child")) { if (!new_node_bufs.empty()) { new_node_bufs[0]->set_crash_flag(); } + } else if (iomgr_flip::instance()->test_flip("crash_flush_on_freed_child")) { + freed_node_bufs[0]->set_crash_flag(); } } else if (!freed_node_bufs.empty() && (new_node_bufs.size() == freed_node_bufs.size())) { // Its a rebalance node situation @@ -246,6 +248,8 @@ static void set_crash_flips(IndexBufferPtr const& parent_buf, IndexBufferPtr con child_buf->set_crash_flag(); } else if (iomgr_flip::instance()->test_flip("crash_flush_on_rebalance_at_right_child")) { if (!new_node_bufs.empty()) { new_node_bufs[0]->set_crash_flag(); } + } else if (iomgr_flip::instance()->test_flip("crash_flush_on_freed_child")) { + freed_node_bufs[0]->set_crash_flag(); } } } @@ -265,15 +269,8 @@ void IndexWBCache::transact_bufs(uint32_t index_ordinal, IndexBufferPtr const& p link_buf(child_buf, buf, true /* is_sibling_link */, cp_ctx); } - for (auto const& buf : freed_node_bufs) { - if (!buf->m_wait_for_down_buffers.testz()) { - // This buffer has some down bufs depending on it. It can happen for an upper level interior node, where - // lower level node (say leaf) has split causing it to write entries in this node, but this node is now - // merging with other node, causing it to free. In these rare instances, we link this node to the new - // node resulting in waiting for all the down bufs to be flushed before up buf can flush (this buf is - // not written anyways) - link_buf(child_buf, buf, true /* is_sibling_link */, cp_ctx); - } + for (auto const &buf: freed_node_bufs) { + link_buf(child_buf, buf, true /* is_sibling_link */, cp_ctx); } if (new_node_bufs.empty() && freed_node_bufs.empty()) { @@ -286,10 +283,9 @@ void IndexWBCache::transact_bufs(uint32_t index_ordinal, IndexBufferPtr const& p } else { icp_ctx->add_to_txn_journal(index_ordinal, // Ordinal child_buf->m_up_buffer, // real up buffer - new_node_bufs.empty() ? freed_node_bufs[0]->m_up_buffer - : new_node_bufs[0]->m_up_buffer, // real in place child - new_node_bufs, // new node bufs - freed_node_bufs // free_node_bufs + child_buf, // real in place child + new_node_bufs, // new node bufs + freed_node_bufs // free_node_bufs ); } #if 0 @@ -371,6 +367,21 @@ void IndexWBCache::link_buf(IndexBufferPtr const& up_buf, IndexBufferPtr const& } // Now we link the down_buffer to the real up_buffer + if (down_buf->m_up_buffer) { + // release existing up_buffer's wait count + down_buf->m_up_buffer->m_wait_for_down_buffers.decrement(); +#ifndef NDEBUG + bool found{false}; + for (auto it = down_buf->m_up_buffer->m_down_buffers.begin(); it != down_buf->m_up_buffer->m_down_buffers.end(); ++it) { + if (it->lock() == down_buf) { + down_buf->m_up_buffer->m_down_buffers.erase(it); + found = true; + break; + } + } + HS_DBG_ASSERT(found, "Down buffer is linked to Up buf, but up_buf doesn't have down_buf in its list"); +#endif + } real_up_buf->m_wait_for_down_buffers.increment(1); down_buf->m_up_buffer = real_up_buf; #ifndef NDEBUG @@ -384,9 +395,13 @@ void IndexWBCache::free_buf(const IndexBufferPtr& buf, CPContext* cp_ctx) { bool done = m_cache.remove(buf->m_blkid, node); HS_REL_ASSERT_EQ(done, true, "Race on cache removal of btree blkid?"); } - + buf->m_node_freed = true; resource_mgr().inc_free_blk(m_node_size); - m_vdev->free_blk(buf->m_blkid, s_cast< VDevCPContext* >(cp_ctx)); + if (buf->is_clean()) { + buf->set_state(index_buf_state_t::DIRTY); + r_cast(cp_ctx)->add_to_dirty_list(buf); + resource_mgr().inc_dirty_buf_size(m_node_size); + } } //////////////////// Recovery Related section ///////////////////////////////// @@ -418,31 +433,52 @@ void IndexWBCache::recover(sisl::byte_view sb) { // This has to be done before doing any repair, because repair can allocate blkids and we don't want to allocate // the same blkid which could clash with the blkid next in the buf list. // - // On the second pass, we only take the new nodes/bufs and then repair their up buffers, if needed. - std::vector< IndexBufferPtr > l0_bufs; + // On the second pass, we only take part of the parents/siblings and then repair them, if needed. + std::vector pending_bufs; + std::vector deleted_bufs; for (auto const& [_, buf] : bufs) { - if (buf->m_node_freed || (buf->m_created_cp_id == icp_ctx->id())) { + if (buf->m_node_freed) { + // Freed node + if (buf->m_bytes == nullptr) { + buf->m_bytes = hs_utils::iobuf_alloc(m_node_size, sisl::buftag::btree_node, m_vdev->align_size()); + m_vdev->sync_read(r_cast(buf->m_bytes), m_node_size, buf->blkid()); + } + if (was_node_committed(buf)) { - if (was_node_committed(buf->m_up_buffer)) { - if (buf->m_node_freed) { - // Up buffer was written, so this buffer can be freed and thus can free the blk. - m_vdev->free_blk(buf->m_blkid, s_cast< VDevCPContext* >(icp_ctx)); - } else { - m_vdev->commit_blk(buf->m_blkid); - } - l0_bufs.push_back(buf); - } else { - buf->m_up_buffer->m_wait_for_down_buffers.decrement(); + // Mark this buffer as deleted, so that we can avoid using it anymore when repairing its parent's link + r_cast(buf->m_bytes)->node_deleted = true; + write_buf(nullptr, buf, icp_ctx); + deleted_bufs.push_back(buf); + pending_bufs.push_back(buf->m_up_buffer); + } else { + // (Up) buffer is not committed, node need to be kept and (potentially) repaired later + buf->m_node_freed = false; + buf->m_dirtied_cp_id = BtreeNode::get_modified_cp_id(buf->m_bytes); + if (buf->m_created_cp_id == icp_ctx->id()) { + // New nodes need to be commited first + m_vdev->commit_blk(buf->m_blkid); } + pending_bufs.push_back(buf); + buf->m_wait_for_down_buffers.increment(1); // Purely for recover_buf() counter consistency + } + } else if (buf->m_created_cp_id == icp_ctx->id()) { + // New node + if (was_node_committed(buf) && was_node_committed(buf->m_up_buffer)) { + // Both current and up buffer is commited, we can safely commit the current block + m_vdev->commit_blk(buf->m_blkid); + pending_bufs.push_back(buf->m_up_buffer); + } else { + // Just ignore it + buf->m_up_buffer->m_wait_for_down_buffers.decrement(); } } } LOGINFOMOD(wbcache, "Index Recovery detected {} nodes out of {} as new/freed nodes to be recovered in prev cp={}", - l0_bufs.size(), bufs.size(), icp_ctx->id()); + pending_bufs.size(), bufs.size(), icp_ctx->id()); - auto detailed_log = [this](std::map< BlkId, IndexBufferPtr > const& bufs, - std::vector< IndexBufferPtr > const& l0_bufs) { + auto detailed_log = [this](std::map const &bufs, + std::vector const &pending_bufs) { // Logs to detect down_waits are set correctly for up buffers list of all recovered bufs std::string log = fmt::format("\trecovered bufs (#of bufs = {})\n", bufs.size()); for (auto const& [_, buf] : bufs) { @@ -450,20 +486,26 @@ void IndexWBCache::recover(sisl::byte_view sb) { } // list of new_bufs - fmt::format_to(std::back_inserter(log), "\n\tl0_bufs (#of bufs = {})\n", l0_bufs.size()); - for (auto const& buf : l0_bufs) { + fmt::format_to(std::back_inserter(log), "\n\tpending_bufs (#of bufs = {})\n", pending_bufs.size()); + for (auto const &buf: pending_bufs) { fmt::format_to(std::back_inserter(log), "{}\n", buf->to_string()); } return log; }; - LOGTRACEMOD(wbcache, "All unclean bufs list\n{}", detailed_log(bufs, l0_bufs)); + LOGTRACEMOD(wbcache, "All unclean bufs list\n{}", detailed_log(bufs, pending_bufs)); + + for (auto const &buf: pending_bufs) { + recover_buf(buf); + if (buf->m_bytes != nullptr && r_cast(buf->m_bytes)->node_deleted) { + // This buffer was marked as deleted during repair, so we also need to free it + deleted_bufs.push_back(buf); + } + } - // Second iteration we start from the lowest levels (which are all new_bufs) and check if up_buffers need to be - // repaired. All L1 buffers are not needed to repair, because they are sibling nodes and so we pass false in - // do_repair flag. - for (auto const& buf : l0_bufs) { - recover_buf(buf->m_up_buffer); + for (auto const &buf: deleted_bufs) { + m_vdev->free_blk(buf->m_blkid, s_cast(icp_ctx)); } + m_in_recovery = false; m_vdev->recovery_completed(); } @@ -556,17 +598,18 @@ folly::Future< bool > IndexWBCache::async_cp_flush(IndexCPContext* cp_ctx) { void IndexWBCache::do_flush_one_buf(IndexCPContext* cp_ctx, IndexBufferPtr const& buf, bool part_of_batch) { #ifdef _PRERELEASE + if (hs()->crash_simulator().is_crashed()) { + LOGINFOMOD(wbcache, "crash simulation is ongoing, aid simulation by not flushing"); + return; + } if (buf->m_crash_flag_on) { -// std::string filename = "crash_buf_" + std::to_string(cp_ctx->id()) + ".dot"; -// LOGINFOMOD(wbcache, "Simulating crash while writing buffer {}, stored in file {}", buf->to_string(), filename); -// cp_ctx->to_string_dot(filename); + // std::string filename = "crash_buf_" + std::to_string(cp_ctx->id()) + ".dot"; + // LOGINFOMOD(wbcache, "Simulating crash while writing buffer {}, stored in file {}", buf->to_string(), filename); + // cp_ctx->to_string_dot(filename); LOGINFOMOD(wbcache, "Simulating crash while writing buffer {}", buf->to_string()); hs()->crash_simulator().crash(); cp_ctx->complete(true); return; - } else if (hs()->crash_simulator().is_crashed()) { - LOGINFOMOD(wbcache, "crash simulation is ongoing, aid simulation by not flushing"); - return; } #endif @@ -574,17 +617,19 @@ void IndexWBCache::do_flush_one_buf(IndexCPContext* cp_ctx, IndexBufferPtr const buf->set_state(index_buf_state_t::FLUSHING); if (buf->is_meta_buf()) { - LOGTRACEMOD(wbcache, "flushing cp {} meta buf {} possibly because of root split", cp_ctx->id(), + LOGTRACEMOD(wbcache, "Flushing cp {} meta buf {} possibly because of root split", cp_ctx->id(), buf->to_string()); auto const& sb = r_cast< MetaIndexBuffer* >(buf.get())->m_sb; - meta_service().update_sub_sb(buf->m_bytes, sb.size(), sb.meta_blk()); + if (!sb.is_empty()) { + meta_service().update_sub_sb(buf->m_bytes, sb.size(), sb.meta_blk()); + } process_write_completion(cp_ctx, buf); } else if (buf->m_node_freed) { - LOGTRACEMOD(wbcache, "Not flushing buf {} as it was freed, its here for merely dependency", cp_ctx->id(), - buf->to_string()); + LOGTRACEMOD(wbcache, "Not flushing cp {} buf {} as it was freed", cp_ctx->id(), buf->to_string()); + m_vdev->free_blk(buf->m_blkid, cp_ctx); process_write_completion(cp_ctx, buf); } else { - LOGTRACEMOD(wbcache, "flushing cp {} buf {} info: {}", cp_ctx->id(), buf->to_string(), + LOGTRACEMOD(wbcache, "Flushing cp {} buf {} info: {}", cp_ctx->id(), buf->to_string(), BtreeNode::to_string_buf(buf->raw_buffer())); m_vdev->async_write(r_cast< const char* >(buf->raw_buffer()), m_node_size, buf->m_blkid, part_of_batch) .thenValue([buf, cp_ctx](auto) { @@ -685,7 +730,7 @@ void IndexWBCache::get_next_bufs_internal(IndexCPContext* cp_ctx, uint32_t max_c std::optional< IndexBufferPtr > buf = cp_ctx->next_dirty(); if (!buf) { break; } // End of list - if ((*buf)->m_wait_for_down_buffers.testz()) { + if ((*buf)->state() == index_buf_state_t::DIRTY && (*buf)->m_wait_for_down_buffers.testz()) { bufs.emplace_back(std::move(*buf)); ++count; } else { diff --git a/src/tests/test_common/homestore_test_common.hpp b/src/tests/test_common/homestore_test_common.hpp index 174039495..b01aff3f2 100644 --- a/src/tests/test_common/homestore_test_common.hpp +++ b/src/tests/test_common/homestore_test_common.hpp @@ -198,8 +198,8 @@ class HSTestHelper { } homestore::HomeStore::instance()->shutdown(); + iomanager.stop(); // Stop iomanager first in case any fiber is still referencing homestore resources homestore::HomeStore::reset_instance(); - iomanager.stop(); if (cleanup) { remove_files(m_generated_devs); @@ -251,6 +251,11 @@ class HSTestHelper { m_fc.inject_delay_flip(flip_name, {null_cond}, freq, delay_usec); LOGDEBUG("Flip {} set", flip_name); } + + void remove_flip(const std::string flip_name) { + m_fc.remove_flip(flip_name); + LOGDEBUG("Flip {} removed", flip_name); + } #endif static void fill_data_buf(uint8_t* buf, uint64_t size, uint64_t pattern = 0) { diff --git a/src/tests/test_index_crash_recovery.cpp b/src/tests/test_index_crash_recovery.cpp index 11235be6a..9f447e7c4 100644 --- a/src/tests/test_index_crash_recovery.cpp +++ b/src/tests/test_index_crash_recovery.cpp @@ -36,23 +36,25 @@ SISL_LOGGING_DECL(test_index_crash_recovery) SISL_OPTION_GROUP(test_index_crash_recovery, (num_iters, "", "num_iters", "number of iterations for rand ops", - ::cxxopts::value< uint32_t >()->default_value("500"), "number"), + ::cxxopts::value< uint32_t >()->default_value("500"), "number"), (num_entries, "", "num_entries", "number of entries to test with", - ::cxxopts::value< uint32_t >()->default_value("5000"), "number"), + ::cxxopts::value< uint32_t >()->default_value("5000"), "number"), (run_time, "", "run_time", "run time for io", ::cxxopts::value< uint32_t >()->default_value("360000"), - "seconds"), + "seconds"), (max_keys_in_node, "", "max_keys_in_node", "max_keys_in_node", - ::cxxopts::value< uint32_t >()->default_value("0"), ""), + ::cxxopts::value< uint32_t >()->default_value("20"), ""), + (min_keys_in_node, "", "min_keys_in_node", "min_keys_in_node", + ::cxxopts::value< uint32_t >()->default_value("6"), ""), (operation_list, "", "operation_list", - "operation list instead of default created following by percentage", - ::cxxopts::value< std::vector< std::string > >(), "operations [...]"), + "operation list instead of default created following by percentage", + ::cxxopts::value< std::vector< std::string > >(), "operations [...]"), (preload_size, "", "preload_size", "number of entries to preload tree with", - ::cxxopts::value< uint32_t >()->default_value("1000"), "number"), + ::cxxopts::value< uint32_t >()->default_value("1000"), "number"), (init_device, "", "init_device", "init device", ::cxxopts::value< bool >()->default_value("1"), ""), (cleanup_after_shutdown, "", "cleanup_after_shutdown", "cleanup after shutdown", - ::cxxopts::value< bool >()->default_value("1"), ""), + ::cxxopts::value< bool >()->default_value("1"), ""), (seed, "", "seed", "random engine seed, use random if not defined", - ::cxxopts::value< uint64_t >()->default_value("0"), "number")) + ::cxxopts::value< uint64_t >()->default_value("0"), "number")) void log_obj_life_counter() { std::string str; @@ -96,10 +98,16 @@ class SequenceGenerator { keyDist_ = std::uniform_int_distribution<>(start_range_, end_range_); } + void fillRange(uint64_t start, uint64_t end) { + for (uint64_t i = start; i <= end; ++i) { + keyStates[i] = true; + } + } + OperationList generateOperations(size_t numOperations, bool reset = false) { std::vector< Operation > operations; if (reset) { this->reset(); } - for (size_t i = 0; i < numOperations; ++i) { + while (operations.size() < numOperations) { uint32_t key = keyDist_(gen_); auto [it, inserted] = keyStates.try_emplace(key, false); auto& inUse = it->second; @@ -117,6 +125,7 @@ class SequenceGenerator { return operations; } + __attribute__((noinline)) std::string showKeyState(uint64_t key) const { auto it = keyStates.find(key); if (it != keyStates.end()) { return it->second ? "Put" : "Remove"; } @@ -131,6 +140,7 @@ class SequenceGenerator { } return occurrences; } + __attribute__((noinline)) std::string printOperations(const OperationList& operations) const { std::ostringstream oss; for (const auto& [key, opType] : operations) { @@ -139,6 +149,7 @@ class SequenceGenerator { } return oss.str(); } + __attribute__((noinline)) std::string printKeysOccurrences(const OperationList& operations) const { std::set< uint64_t > keys = collectUniqueKeys(operations); std::ostringstream oss; @@ -152,6 +163,7 @@ class SequenceGenerator { } return oss.str(); } + __attribute__((noinline)) std::string printKeyOccurrences(const OperationList& operations, uint64_t key ) const { std::ostringstream oss; auto keyOccurrences = inspect(operations, key); @@ -162,6 +174,7 @@ class SequenceGenerator { } return oss.str(); } + void reset() { keyStates.clear(); } private: @@ -204,6 +217,7 @@ struct IndexCrashTest : public test_common::HSTestHelper, BtreeTestHelper< TestT m_test->m_cfg.m_leaf_node_type = T::leaf_node_type; m_test->m_cfg.m_int_node_type = T::interior_node_type; m_test->m_cfg.m_max_keys_in_node = SISL_OPTIONS["max_keys_in_node"].as< uint32_t >(); + m_test->m_cfg.m_min_keys_in_node = SISL_OPTIONS["min_keys_in_node"].as(); m_test->m_bt = std::make_shared< typename T::BtreeType >(std::move(sb), m_test->m_cfg); return m_test->m_bt; } @@ -232,6 +246,7 @@ struct IndexCrashTest : public test_common::HSTestHelper, BtreeTestHelper< TestT LOGINFO("Node size {} ", hs()->index_service().node_size()); this->m_cfg = BtreeConfig(hs()->index_service().node_size()); this->m_cfg.m_max_keys_in_node = SISL_OPTIONS["max_keys_in_node"].as< uint32_t >(); + this->m_cfg.m_min_keys_in_node = SISL_OPTIONS["min_keys_in_node"].as(); auto uuid = boost::uuids::random_generator()(); auto parent_uuid = boost::uuids::random_generator()(); @@ -257,7 +272,10 @@ struct IndexCrashTest : public test_common::HSTestHelper, BtreeTestHelper< TestT } void reset_btree() { + hs()->index_service().remove_index_table(this->m_bt); this->m_bt->destroy(); + this->trigger_cp(true); + auto uuid = boost::uuids::random_generator()(); auto parent_uuid = boost::uuids::random_generator()(); this->m_bt = std::make_shared< typename T::BtreeType >(uuid, parent_uuid, 0, this->m_cfg); @@ -274,7 +292,7 @@ struct IndexCrashTest : public test_common::HSTestHelper, BtreeTestHelper< TestT void reapply_after_crash() { ShadowMap< K, V > snapshot_map{this->m_shadow_map.max_keys()}; snapshot_map.load(m_shadow_filename); - LOGDEBUG("\tSnapshot before crash\n{}", snapshot_map.to_string()); + // LOGDEBUG("\tSnapshot before crash\n{}", snapshot_map.to_string()); auto diff = this->m_shadow_map.diff(snapshot_map); // visualize tree after crash @@ -286,16 +304,23 @@ struct IndexCrashTest : public test_common::HSTestHelper, BtreeTestHelper< TestT for (const auto& [k, addition] : diff) { dif_str += fmt::format(" {} \t{}\n", k.key(), addition); } - LOGDEBUG("Diff between shadow map and snapshot map\n{}\n", dif_str); + // LOGDEBUG("Diff between shadow map and snapshot map\n{}\n", dif_str); - for (const auto& [k, addition] : diff) { + for (const auto &[k, addition]: diff) { // this->print_keys(fmt::format("reapply: before inserting key {}", k.key())); // this->visualize_keys(recovered_tree_filename); - if (addition) { this->force_upsert(k.key()); } + if (addition) { + LOGDEBUG("Reapply: Inserting key {}", k.key()); + this->force_upsert(k.key()); + } else { + LOGDEBUG("Reapply: Removing key {}", k.key()); + this->remove_one(k.key(), false); + } } - test_common::HSTestHelper::trigger_cp(true); + trigger_cp(true); this->m_shadow_map.save(m_shadow_filename); } + void reapply_after_crash(OperationList& operations) { for (const auto& [key, opType] : operations) { switch (opType) { @@ -309,7 +334,7 @@ struct IndexCrashTest : public test_common::HSTestHelper, BtreeTestHelper< TestT break; } } - test_common::HSTestHelper::trigger_cp(true); + trigger_cp(true); } void TearDown() override { @@ -331,14 +356,15 @@ struct IndexCrashTest : public test_common::HSTestHelper, BtreeTestHelper< TestT } void crash_and_recover(uint32_t s_key, uint32_t e_key) { - this->print_keys("Btree prior to CP and susbsequent simulated crash: "); - test_common::HSTestHelper::trigger_cp(false); + // this->print_keys("Btree prior to CP and susbsequent simulated crash: "); + trigger_cp(false); this->wait_for_crash_recovery(); // this->visualize_keys("tree_after_crash_" + std::to_string(s_key) + "_" + std::to_string(e_key) + ".dot"); - this->print_keys("Post crash and recovery, btree structure: "); + // this->print_keys("Post crash and recovery, btree structure: "); this->reapply_after_crash(); + // Verification this->get_all(); LOGINFO("Expect to have [{},{}) in tree and it is actually{} ", s_key, e_key, tree_key_count()); ASSERT_EQ(this->m_shadow_map.size(), this->m_bt->count_keys(this->m_bt->root_node_id())) @@ -346,10 +372,10 @@ struct IndexCrashTest : public test_common::HSTestHelper, BtreeTestHelper< TestT } void crash_and_recover(OperationList& operations, std::string filename = "") { - // this->print_keys("Btree prior to CP and susbsequent simulated crash: "); - test_common::HSTestHelper::trigger_cp(false); + // this->print_keys("Btree prior to CP and susbsequent simulated crash: "); + trigger_cp(false); this->wait_for_crash_recovery(); - // this->print_keys("Post crash and recovery, btree structure:"); + // this->print_keys("Post crash and recovery, btree structure:"); if (!filename.empty()) { LOGINFO("Visualize the tree file {}", filename); @@ -364,6 +390,7 @@ struct IndexCrashTest : public test_common::HSTestHelper, BtreeTestHelper< TestT this->visualize_keys("after_reapply__" + filename); } + // Verification this->get_all(); } @@ -442,82 +469,6 @@ TYPED_TEST(IndexCrashTest, SplitOnLeftEdge) { this->query_all_paginate(80); } -/* -TYPED_TEST(IndexCrashTest, ManualMergeCrash){ - // Define the lambda function - const uint32_t num_entries = 30; - - auto initTree = [this, num_entries]() { - for (uint64_t k = 0u; k < num_entries; ++k) { - this->force_upsert(k); - } - test_common::HSTestHelper::trigger_cp(true); - this->m_shadow_map.save(this->m_shadow_filename); - }; - - std::vector< OperationList > removing_scenarios = { - {{29, OperationType::Remove}, - {28, OperationType::Remove}, - {27, OperationType::Remove}, - {26, OperationType::Remove}, - {25, OperationType::Remove}, - {24, OperationType::Remove}} - }; - - auto scenario = removing_scenarios[0]; - - LOGINFO("Step 1-1: Populate some keys and flush"); - initTree(); - this->visualize_keys("tree_init.dot"); - LOGINFO("Step 2-1: Set crash flag, remove some keys in reverse order"); - this->set_basic_flip("crash_flush_on_merge_at_parent"); - - for (auto [k, _] : scenario) { - LOGINFO("\n\n\t\t\t\t\t\t\t\t\t\t\t\t\tRemoving entry {}", k); - this->remove_one(k); - } - this->visualize_keys("tree_before_crash.dot"); - - LOGINFO("Step 3-1: Trigger cp to crash"); - this->crash_and_recover(scenario, "recover_tree_crash_1.dot"); - test_common::HSTestHelper::trigger_cp(true); - this->get_all(); - - LOGINFO("Step 1-2: Populate some keys and flush"); - initTree(); - this->visualize_keys("tree_init_02.dot"); - LOGINFO("Step 2-2: Set crash flag, remove some keys in reverse order"); - this->set_basic_flip("crash_flush_on_merge_at_left_child"); - for (auto [k, _] : scenario) { - LOGINFO("\n\n\t\t\t\t\t\t\t\t\t\t\t\t\tRemoving entry {}", k); - this->remove_one(k); - } - this->visualize_keys("tree_before_crash_2.dot"); - - LOGINFO("Step 3-2: Trigger cp to crash"); - this->crash_and_recover(scenario, "recover_tree_crash_2.dot"); - test_common::HSTestHelper::trigger_cp(true); - this->get_all(); - - LOGINFO("Step 1-3: Populate some keys and flush"); - initTree(); - this->visualize_keys("tree_init_03.dot"); - LOGINFO("Step 2-3: Set crash flag, remove some keys in reverse order"); - this->set_basic_flip("crash_flush_on_freed_child"); - for (auto [k, _] : scenario) { - LOGINFO("\n\n\t\t\t\t\t\t\t\t\t\t\t\t\tRemoving entry {}", k); - this->remove_one(k); - } - LOGINFO("Step 2-3: Set crash flag, remove some keys in reverse order"); - this->visualize_keys("tree_before_crash_3.dot"); - - LOGINFO("Step 3-3: Trigger cp to crash"); - this->crash_and_recover(scenario, "recover_tree_crash_3.dot"); - test_common::HSTestHelper::trigger_cp(true); - this->get_all(); -} -*/ - TYPED_TEST(IndexCrashTest, SplitCrash1) { // Define the lambda function auto const num_entries = SISL_OPTIONS["num_entries"].as< uint32_t >(); @@ -583,6 +534,215 @@ TYPED_TEST(IndexCrashTest, long_running_put_crash) { } } + +// Basic reverse and forward order remove with different flip points +TYPED_TEST(IndexCrashTest, MergeRemoveBasic) { + vector flip_points = { + "crash_flush_on_merge_at_parent", + "crash_flush_on_merge_at_left_child", + "crash_flush_on_freed_child", + }; + + for (size_t i = 0; i < flip_points.size(); ++i) { + this->reset_btree(); + + auto &flip_point = flip_points[i]; + LOGINFO("=== Testing flip point: {} - {} ===", i + 1, flip_point); + + // Populate some keys [1,num_entries) and trigger cp to persist + LOGINFO("Step {}-1: Populate some keys and flush", i+1); + auto const num_entries = SISL_OPTIONS["num_entries"].as< uint32_t >(); + for (auto k = 0u; k < num_entries; ++k) { + this->put(k, btree_put_type::INSERT, true /* expect_success */); + } + test_common::HSTestHelper::trigger_cp(true); + this->m_shadow_map.save(this->m_shadow_filename); + + this->visualize_keys("tree_merge_full.dot"); + + // Split keys into batches and remove the last one in reverse order + LOGINFO("Step {}-2: Set crash flag, remove some keys in reverse order", i + 1); + int batch_num = 4; + { + int n = batch_num; + auto r = num_entries * n / batch_num - 1; + auto l = num_entries * (n - 1) / batch_num; + OperationList ops; + for (auto k = r; k >= l; --k) { + ops.emplace_back(k, OperationType::Remove); + } + LOGINFO("Step {}-2-1: Remove keys in batch {}/{} ({} to {})", i + 1, n, batch_num, r, l); + + this->set_basic_flip(flip_point); + for (auto k = r; k >= l; --k) { + LOGINFO("Removing key {}", k); + this->remove_one(k); + } + this->visualize_keys("tree_merge_before_first_crash.dot"); + + LOGINFO("Step {}-2-2: Trigger cp to crash", i + 1); + this->crash_and_recover(ops); + } + + // Remove the next batch of keys in forward order + LOGINFO("Step {}-3: Remove another batch in ascending order", i + 1) { + int n = batch_num - 1; + auto r = num_entries * n / batch_num - 1; + auto l = num_entries * (n - 1) / batch_num; + OperationList ops; + for (auto k = l; k <= r; ++k) { + ops.emplace_back(k, OperationType::Remove); + } + LOGINFO("Step {}-3-1: Remove keys in batch {}/{} ({} to {})", i + 1, n, batch_num, l, r); + + this->set_basic_flip(flip_point); + for (auto k = l; k <= r; ++k) { + LOGINFO("Removing key {}", k); + this->remove_one(k); + } + this->visualize_keys("tree_merge_before_second_crash.dot"); + + LOGINFO("Step {}-3-2: Trigger cp to crash", i + 1); + this->crash_and_recover(ops); + } + + LOGINFO("Step {}-4: Cleanup the tree", i + 1); + for (auto k = 0u; k < num_entries; ++k) { + this->remove_one(k, false); + } + test_common::HSTestHelper::trigger_cp(true); + this->get_all(); + } +} +// +// TYPED_TEST(IndexCrashTest, MergeCrash1) { +// auto const num_entries = SISL_OPTIONS["num_entries"].as(); +// vector flips = { +// "crash_flush_on_merge_at_parent", "crash_flush_on_merge_at_left_child", "crash_flush_on_freed_child" +// }; +// SequenceGenerator generator(0 /*putFreq*/, 100 /* removeFreq*/, 0 /*start_range*/, num_entries - 1 /*end_range*/); +// OperationList operations; +// for (size_t i = 0; i < flips.size(); ++i) { +// this->reset_btree(); +// LOGINFO("Step {}-1: Init btree", i + 1); +// for (auto k = 0u; k < num_entries; ++k) { +// this->put(k, btree_put_type::INSERT, true /* expect_success */); +// } +// test_common::HSTestHelper::trigger_cp(true); +// this->print_keys("Inited tree"); +// +// LOGINFO("Step {}-2: Set flag {}", i + 1, flips[i]); +// this->set_basic_flip(flips[i], 1, 10); +// generator.reset(); +// generator.fillRange(0, num_entries - 1); +// +// // Randomly remove some keys +// std::random_device rd; // Random device to seed the generator +// std::mt19937 gen(rd()); // Mersenne Twister engine +// std::uniform_int_distribution<> dis(num_entries / 4, num_entries / 2); +// auto num_keys_to_remove = dis(gen); +// LOGINFO("Removing {} keys before crash", num_keys_to_remove); +// operations = generator.generateOperations(num_keys_to_remove, false /* reset */); +// for (auto [k, _]: operations) { +// LOGINFO("Removing key {}", k); +// this->remove_one(k, true); +// } +// +// LOGINFO("Step {}-3: Simulate crash and recover", i + 1); +// this->crash_and_recover(operations, fmt::format("recover_tree_crash_{}.dot", i + 1)); +// } +// } +// +// TYPED_TEST(IndexCrashTest, MergeManualCrash) { +// std::vector flip_points = { +// "crash_flush_on_merge_at_parent", +// "crash_flush_on_merge_at_left_child", +// "crash_flush_on_freed_child", +// }; +// +// constexpr uint32_t num_entries = 28; // with max=5 & min=3 +// +// auto initTree = [this, num_entries]() { +// for (auto k = 0u; k < num_entries; ++k) { +// this->put(k, btree_put_type::INSERT, true /* expect_success */); +// } +// test_common::HSTestHelper::trigger_cp(true); +// this->m_shadow_map.save(this->m_shadow_filename); +// }; +// +// std::vector removing_scenarios = { +// { +// {27, OperationType::Remove}, +// {26, OperationType::Remove}, +// {25, OperationType::Remove}, +// {24, OperationType::Remove}, +// {23, OperationType::Remove}, +// {22, OperationType::Remove}, +// }, // Merge 2 rightmost leaf nodes in 1 action +// { +// {27, OperationType::Remove}, +// {26, OperationType::Remove}, +// {25, OperationType::Remove}, +// {24, OperationType::Remove}, +// {23, OperationType::Remove}, +// {20, OperationType::Remove}, +// {19, OperationType::Remove}, +// }, // Merge 3 rightmost leaf nodes in 1 action +// { +// {27, OperationType::Remove}, +// {26, OperationType::Remove}, +// {25, OperationType::Remove}, +// {24, OperationType::Remove}, +// {23, OperationType::Remove}, +// {22, OperationType::Remove}, +// {21, OperationType::Remove}, +// {20, OperationType::Remove}, +// {19, OperationType::Remove}, +// }, // Merge 3 rightmost leaf nodes in 2 actions +// { +// {23, OperationType::Remove}, +// {22, OperationType::Remove}, +// {11, OperationType::Remove}, +// {10, OperationType::Remove}, +// {13, OperationType::Remove}, +// }, // Merge from level=0 then level=1 +// // { +// // {16, OperationType::Remove}, +// // }, // Merge from level=1 then level=0 - need to set min=4 +// }; +// +// for (int i = 0; i < static_cast(removing_scenarios.size()); i++) { +// auto scenario = removing_scenarios[i]; +// auto s_idx = i + 1; +// LOGINFO("\n\tTesting scenario {}", s_idx); +// for (int j = 0; j < static_cast(flip_points.size()); j++) { +// const auto &flip_point = flip_points[j]; +// auto f_idx = j + 1; +// LOGINFO("\n\t\t\t\tTesting flip point: {}", flip_point); +// +// LOGINFO("Step {}-{}-1: Populate keys and flush", s_idx, f_idx); +// initTree(); +// this->visualize_keys(fmt::format("tree_init.{}_{}.dot", s_idx, f_idx)); +// +// LOGINFO("Step {}-{}-2: Set crash flag, remove keys in reverse order", s_idx, f_idx); +// this->set_basic_flip(flip_point); +// for (auto k: scenario) { +// LOGINFO("Removing entry {}", k.first); +// this->remove_one(k.first); +// } +// this->visualize_keys(fmt::format("tree_before_first_crash.{}_{}.dot", s_idx, f_idx)); +// this->remove_flip(flip_point); +// +// LOGINFO("Step {}-{}-3: Trigger cp to crash", s_idx, f_idx); +// this->crash_and_recover(scenario); +// test_common::HSTestHelper::trigger_cp(true); +// this->get_all(); +// +// this->reset_btree(); +// test_common::HSTestHelper::trigger_cp(true); +// } +// } +// } #endif int main(int argc, char* argv[]) {