From d5ca9b369681cbc0d40d5ce7b5558d3dd1588c86 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Thu, 30 May 2024 09:45:02 -0700 Subject: [PATCH] updated autocoderover results --- _posts/2024-05-22-swe-bench-lite.md | 3 +- assets/swe_bench_lite.jpg | Bin 37241 -> 37348 bytes assets/swe_bench_lite.svg | 458 +++++++++++++++------------- benchmark/swe-bench-lite.txt | 2 +- 4 files changed, 248 insertions(+), 215 deletions(-) diff --git a/_posts/2024-05-22-swe-bench-lite.md b/_posts/2024-05-22-swe-bench-lite.md index e3a3048e33d..cb89283fa2d 100644 --- a/_posts/2024-05-22-swe-bench-lite.md +++ b/_posts/2024-05-22-swe-bench-lite.md @@ -400,8 +400,9 @@ Below are the references for the SWE-Bench Lite results displayed in the graph at the top of this page. - [25.0% OpenDevin](https://x.com/gneubig/status/1791498953709752405) -- [22.3% AutoCodeRover](https://github.com/nus-apr/auto-code-rover) +- [19.0% AutoCodeRover](https://github.com/swe-bench/experiments/pull/11) - [20.3% Amazon Q Developer Agent (v20240430-dev)](https://www.swebench.com) - [18.0% SWE-Agent + GPT-4](https://www.swebench.com) - [11.7% SWE-Agent + Opus](https://www.swebench.com) +Note: Graph updated on 5/30/24 to accurately reflect AutoCodeRover's pass@1 results. \ No newline at end of file diff --git a/assets/swe_bench_lite.jpg b/assets/swe_bench_lite.jpg index 1b7d106f23ef444470a298e274b59ede644243e5..e695a94e3083c090d0164451c1904aa0c614474d 100644 GIT binary patch delta 13665 zcmbt)Wl&sAx9%W;1WgF;1PvMp?jga01|5RyFqy$UFi0T5WsuRrkm0y=zzRmQ~%WpMIY8Hw%3&8~qn2BYf!a z{k=A_!Td|-o4X1$v9E|5yWi`uG5A|1-9H1vT}Cxg4+S*obAiw9Wr1<-o+v)k%)n&dOsu-K7OLe z2{xtoe(E)I&@bBAr4cW_7m? z>78}nVt=A-9Y4>@uHqKYf}657yGE00yIi`EA-&WvmG=1edsPI)P z>%(y~f}7+pt}BlIBS2rl%F4-C>3CO~w#T$+8&#slk_rQRPzrW|J>sPiTT1W%9hL{c zC)OLO{J*+fb`bo8_7Rd;-nN%pEzJtCY=5oLIsNu5Ft&k7LB$?+UKa{g_%a)$0IJ$F zG@{Kyh8)7Kudgo#0ADoV46NnwNY?lN%gBke(pF6N09)RLk|R?2Cq?TPr|{=&oNx*@ zMpz6*t{Z)+`#Eumq}c@Vz|aFg`~d(IlDa*;1L1}2UP)1wY0g#Nwy>6Y1~|c-&WjD6 zU((15{qvgrPm#wk{>e?~QVJ;aPXTZto*%dynhgJ^@crNQGO#sN<`(DfZ2m1UltQdF z9{_1ERRmS~y~jN=G1FN40T3l|c<0sg0LVx>#W@?$NWXb?x%L2vCtSRP8(G5d7AN!M zQzNdTsx&Pg0B+~mtT!zP#a1rjM5*~Zo1Xi_U1|6i4ij<|DBobWLH_>J<)xAWGP*z@dLYkj+Y zFP?;ZgM8RUpeOX-86vdQJ*Ti$f$komQfYf6n};&e%}^gps%VxJYiMt;MPk~t?vgr zguFYDL`j`PZ{agbi6+Y+$oLPwyCbgYQSU_Whta4g~rD~qp%XY z!o0~KbHh<4>*%-_a>~5F(KC_boMi!2Vzu1pN1YTHtGfYO=L1Z;f)^J9Yz8j1iqW_Bh(#P9_f;0?WWB_kc_ zH}NpH(3kF!k>it$1xzEwo*e*DRJ$NnqjIJqJs-eEJ}66Hhb0;_jITh?;_iukQb%)g z-az)Y;3OBs;mNr0I3*#s$7wKl+P({ zHsCk)XszBpP;c@>c#isEOpzK_L@mwsd7xc^zIU~X>czoRB2CEN?Kb*>w2u|V{i#m~ zvSIRPWyeQOG?;G3$=v6~t$J!*P2l@NW%@Lm2r4t~{fvwG6kTmSNp9rG0Y3yaFg5ck zxUIt;fn0@`8~we=!0WsMWn@7sh2j(5$JK=_*97bikz_A5scvc=jlVTgU``Wq2WpI} zlt|mPo7K@+N$tV$)5=x{q-O_{8-F4qf100jJ`=u<y%Tq@VEFo%6x!VR4d)Lf4ORMXJ}A4j$>u1k<*1CHjRYeLYc>FLk|9 z6tUK4+HC#+@NN+m@4CP7aYnHR(`-mx&YouX!wzdUwN0C=Ya42jA?jlWkW(#;lLHFs zI}Y)y{0{~XfN~lcDKS{Wr2+Kt;GlEv#>!W)@!v)~BJVD4w~;aTEF;|MLN`Xg64Sh)fmw7|&K_mr)aATw5WGd2=VR(_i!E_fbw{ zP~wFgfYsrTM3Wysn$&-Oh~#|$G%uzKh5(Yt%-Q|hBN0G&lkO`Rp44;qyf*33FQqYt z)r;KIf)swjL+|~j(rwdqCk@onP@|}kGM-Xp5BuV8RJz4)Y5gm2U5Kq)6CBCMyJ??O zQAaN1d%}S=wT7;(4**p!5RX~N2SK>OQNJc5bHlq=fxg zrXyZFMq6=}6+t&Vx*9xo8|0Ju?_*em5!SL7G_mZ&Mr zF269OhEU7fwYu2e=PqIWRjYTqt{9Sfyh(xia(I>gth)r(S4{3AMP)Rr^vv%pFa(m1 z`I7O)P$?FuavR^yks)2Pwy~~0to@`f@C2gee}#k18aVVh%1e`Cc>6NwegOK-i!lmL ztE$Z6qjS-{NqcTvcPG`(H^dd0oJR$wI*cDRWi#{9?4^+Y>zEPUQ|kZIzx3UGu5IeS zfhN8!0)DSc0<)G{*@Y-wehisX^Vg42jQw1qS@Okflb)2Z485q$+QBVXIWwI=Fwv6# zW13AG%TtFx462HQ97S27PX-nA!S$(#h`0j1jwelx(a4VO6yX%PcW7FUeA0ssA=6EU zN`GCb*j5FD*U=!J6=9XpLm1HkkqFxnS^rH254#n7H<(JOLem&ywamy}*3le?KZzeR zu@gMv?yVthAx;0J+`<&YhAEZ8Q(7QhW7{G{R~?CC2w)v4!M(Gn2`V`Y^_E*u7C{w^M4!Rju?0~s3Ij_S(&89r@){$bEiOFT&0fo$pBU&r4*D`{Q%JOiIwn! z0<%ImDn;~&(O?{r$9Y^XT-e(6H6CYuA~{0rg)Jb6kg#y;%yt27OGvdqfTC>v~3%QwQfGVgm#*Vy0!h=YIQM z3vkVr1^@Pj2!4R~9QIF$DUMTxoeyZ!^u)Kj(6r+^L6`^zi(kbtlNSTLpRQ@eY7;pc zkyq4=x*QVC$_)Pca3r`*T}~0{z?AL?PP)-vyrqZa&6Cn67m)KRa(BtMx760$vW!jM76Yfya#;kdu?qcp}bVB*6i?CSB;v?exU=$ zJN<@>s~RH0cZiKK3tj9I7%B@o|4}tHoYjb=6yw!kz~3zSu$FO8yJkIx zT{%IaM)!7Lz^}YY(9eZ|)hTJ;9aCqz3W^15s)6A50{eaZI^}mkV^0T1D}0&li=K)5 zN>rxG*`^OD5)(yi}-o{&Dc6xBE@Qd?+1t{L{}l7S;7{ncpNV>S(D@y& z;{n?dX^Ly3NW0u;VU8;NW4}Pwsh2$r;usjL!vy{%%qbubi4o2drK0byPqitQye!~X zckCU+XrboIp&Yo~`HH1$3PRp#&n)MuDVFx>WaLFUAeHLxjL<4ag$dbVGBW;(00KpR zNEhb-KN|~E;EqmOmyUa6@FXIpl?5ooe8)^ z;GEu}o$X_HuBwccW+Zi~SE@8?`IYm(r(QZz^7rm34QK<#Eo6F>cDV@stga7edy2vu z2bkE4+}dD3H+B|!=SyMS`5{j~QRT$77WDk?5qA;b^052CH*Ed;)$tljf=)vokzR3uxLWUjP~`&Zt?t5cS!KLx{_3tn1kQ2sV)%#1;EOTj@qeOw5=2&I6&NTwl?^R9tfwF-efi}$RSPyq$@DS`&vN-1#>$EiL&iyHg^e6|!OH&VOy zTuyk%7O+)NLWgAw2TKx-9YR&N=W!ZfF!y4Tq1CX-a{#f1EPy1vXt&30rkC=^97YZI zOMe}}xH_YU4mPSi{4rUc|D8%N^2Orzi9FLw0&GTW3&u<(K@6I6T#TD}cm0AWdD{@;9^|xKkvFEAXKGE_ak+ART)UCG) zD2CbO@a=fIw9aA~$1!tVQeQj7$Ro9Dk0lmDsfA9iij^kd{o1va-{eY)x-dJn)>b8M z)Uh*1K09C5-(7~g(9m|NM@){w*j~Z=ZAeh!5g}qR-i}!8hk@r@a^QVqrSVPW#ItP; z+1AdN{kosS3YO?{0nV|HPWs1D|1ou=`3_Ta2M>{Ub-7WI3k-fV9WJ+z>A+<9h;86K z@gkF$8b-v`pB-VW5`nvl9mV_vDUf@Arl6o;K-fX2=*1wo55`c39E0#j5+nC3Z8eR> zFUxa^jC+&)c~fpP2*fO=9NbFg4BSyvZzcI30DDt)&Nu{mLJNa$#}<5rcp!*U#D!sd z#fJ#E>32Cfg8U;(?4z7tE!E?DW=)mwftpyTJW7`_|9!yc@SFv<$tv9`hH8{LdZ@ z@lty}y*+y6-(^tKJm;@~31%X|l9M6ANlV{MY?SgS+ofzSuoyRwXzltGPx1~uAx`VA z)eFEo#I%V3xbpvx-~Qu2>z(U`R9FTocQD_PK6p;9L_3+w-@*6Uo~>8k4H%cKsWu8x z59r^?y%HxNCi1)}qlAYNV$26#~?6vhZ zwc6wS%NJHrroY*H(KhMV6*hb&p6H2U8GW}5&Fpz zB4%A>p6w0eNXVzpp>m^#M18$TgV5r<9||*e^+oxSUmpPQR8t49T=y}(biG7w{JdDE z(`=PA3)_wg-3$qN+E}!O5hMFF*6~vQ>=Hr}n8?_W#1}PWZ2irz7jeA>)7=Ok8Ln&vN9@yR&a&0>g$9)6}0uMvz+ew{Vv>s z|CXSDKm4GB`)XmqwAxniakZLbdWUT%$s7|W1?yh)V9;hjm&!8s4Yjq#kzx|BT9tSI zeiIy7b&68uvlpgdg-er@Qd$)ky64e&X(NOFKr(DhYZO03bkxV2c1i$>L?Z(a4sIa2h}uYn_P7g)Qy4f-5~Ta&!WU(P%OTu-MgREu}M}J32cFV z`pUW7mTj}2X?k1gzFM5f*~`Bh5`cAzN=#e^J_$;IZ1hk^e+l(0>&w_Fo^M~rwM{Q>zh!Hvh^mQZ zjEitF4f+&u{~Za{kfw%~h^wqf@Ijc**qpBp0QykY27Q@QN7te-_;!kR8S$2`8!cIN z&FH(B8?_h;h~saf9>4J==N^Q&l;N0wK_9F^nIkvJOK?UskP*~Fqwr^qHW5Jy*x36j zY<`5k^@)wBDVr%#q`l-BtHe>)S;5z5*|wrO2~ON)v;#6bkdv=}A|}*}yO~_2QeCvv zv2imZUtIZ6&6+M3a>p3-SSylVko1;SGK`vMe$D{sVh*`Is>Exm2n_!Tte5y&r2$l1QdQ zT#O)IiNudxO?5O`Vs*67gZWuQ2>0fJ!;)3Fd_|3`2KS`C74(hnFo6dRI4{drgpkNx zhqb{QEgIyV?gRRI{nT>{f1($x6X0R=W#8@-Ed zu9Qs;N}~pIicB`$QZvk%x5#wxT!uavFgB&JW(x0UD8h z;wX9%zrc8G8IkS=78(7r3=THN3uek+*T&=R-u-i4TA$I_;NLn3Zb&#;3dHTcVg-`N1sOdow z>U$Uo)K?@O6F3J1x%D_<{;ITnSzq!)D~{Fw?ez7?5|u%y6gKdk3+AtnWv_XA1NE%r z7z6eV1&GmlcTql>mMdb$L?E3-u*J7oLsT(fh}IEjvsR)h&~i78>6m0sjW#%Q2bXHL zUa7`4NKi%|57Dr}I7~R$@CXX&TC1xkePE3_F% z3QDIe6I_!NA8JRkkN8XIU~%&IO9kPIoVe2*-6N;0uOH(cDH9?Wp#oM_B#yHL=U~1V ze~|=eZ5%_Ek@VQv6-{^Lfix7Dljy{ing7s`tuOH)tha@L9qGZ1DlDn_@&z!NDPMjg z_u!H0qj%7!=hrv~`v>&(fmV-UCJP!#h4twwq%eQ$;u^AkH_pQ$ah9cz5hX3$MNT7G zmqDQ4NaI~_1=A85eNS6l@EI|Tdn*6Jxc2+FIeog%RX)Cu?l>*3({d`0lKPHy!`=97 ztjp76ubZ>dg+$KsKez+@q0}&i+|;=xHtI({kPQQ;OQqp^=XG^9lW&+lJ*1 zZ%W9ch;T60A^EB)jVAsw63y=tqmljj97gmI0@}gp)Czz9s^?{ag|i4AA-%t7i1+^j zbdLyl$h}3aW#X)szfU+gWMi51KzjLW?Yll&RmCNH_5)b)3KyfSvx>~Mu!Tch82X@m z=>+CVYA*O%r>5mb^g?7t@&vD#XF~PqZ2@}f10djxqe*lF#l@%=r1;80QFDeQD*6v5 z-kJ_+aOUnKeEouIiVlA@YSig^s!`##fP1UX{nCR(k=W|7v+!Kcq_)p zMEPFq@}aZ3WkbP3712f1^2z;J(U1dKKf%^4ln^`Od%%D*iPEA@dBZrg>c%TDW@pMS z<&jF45b0z?-~Web|8EoM(X!I!mS$qx@vbX4IN0BVq-`9yFZ*Eu6{!F^Fs_#HTOk+w zsIDS8nnALDIa#~xH-LL*G_|%g65s8UdG`S2%H&TCwJ{!zTcetZ;%wr_zM`I!lE`I4 zB?^HX2`aVs`L;SE{2(eOFm-PmJ8^VaSjt&xm+mn^LMAy@wxj9W_!np|>%N#vIn!xq zbBTkNZ+2_k{b{K`1X$+E9A;PhRTA$TM;3yt@D? zV0s01h_-c#lIFXSAX3+2lzHZ`P}b@eEe_w!=-wZGmOvOu1(%VwBIWv~d%sgzTG<=h|kZ z(%x?DXrmyHi;D55sk1&0tGe1*JL;{Ny+ADDh>Vwu+tL)xBvbSwjbaECf3bZMXAF6W zupJ*`{6{*c=zZ0O^p44U(qyCiiJDW&xuFYQ+Xgeuw3rlR~ZqjACwcye`B*8GG@Mm>s%U>K~6>N6i-fl zJ&YBUzv2~_pg6iZzR$mX@MUsm<$4$T{Dm(u5plW5>S*qWxU9%j?nvIm-nk{%T%0U^ z77de{*ghxhk_3Oj&((5Npctkixu_n)fr(7(T?(A5#U$Z;Lr#5Oe9fI&Aeb$2KA2#h z`)BRva#f(Y2QF>NV0Pq33~BNtmBUaMJFIZK8!K{3;T-44y@CawR;R`X0D--HuNZ~~ z7;d~9x^ySUv1$X&{rIZ2E$t{zi4QGHoD){xh#icNyrytH^f@1a zSj2fUcVNl8Kr^50|e%rxh za$luQVade&{~QGW90vMEOcM$3kM<8T6y}Mo`>-3*h2Jp`!?kk711%(=@^wY&N9xhx4u-q*qe7WD^l2?siY@!K%)dpJ0CLd#;Dfl>T?mm z8w`q0DaxU-F{4qWL0=z4lxm|{A*TUpP^HLajT?}bQ3zWbMmM<1)*9WrRdFb3bn3|y z2NB8nX~nB!xAZHpigc=csy&v>%+up8x@R91ct!?R$&+1=jE#;8qjF%BxO?rqPkY!d zc>kun#XY;Ot~QJ`lKI8N`zJ&8lIpfo7tlHvOjpJ>QI&5q+6fwxTE%|jC01S;Hb=YGncwQxZ!JhVp7hXQHmT7F;>V zza-y^Ry1PV@SJxS>L~$iSt0*pr!aK)X#PX&nOPcEy;?SlyLDu;!gubjAR60FzoK_6 z$dz=WpGhe4EcFYh;J^vDo9G2AIbFD&JJrX=ky~bvIgAkQL=ja1#Sg1vj8fQw$$o+n z&KtUCx3jq8=-Ij|;tZDLZ{SuPr);LHj-wfgd^M2t7dW5b%C`Ex|2p40miTq}ALUI~ zg*#g>J0e51(k)#qnR=Zvn?I)PE%hn*i(V9K4%(wUq+QDofGhdVIOym17uw@K;|+r5 zHjtn5DLzOq-J`j}WhxGuHy#P~FmqY1LNhY2%uSIc3+U|@I>oKBlUUnI0)kTxhG^{VrEvOGcnrbhXA?(*@F%T-;YdN=dJ`81+PN5S7@;SireK${ z=`fhTn2UCwYyYqW{}1RD;V)}3-yK!qPKN=P$!$)Za(VtvkT!Z_uFRo{$QJ@E&m|oU z5E*DyMezI;*5JaQ3=o=jo3&q+mnWXC;(tyxGxxUp5Bui#O-%RNHTphGRcQ}^M_xR) z!UH}4-KMm_ixH*}hpD42pX(k~$ChNq&d5Aw9)$bX8hAq*nHCs-RlRsRrXC{3oLEk* z!e-{^#U40dw|#lfRx!%O`KdLh2lLs}=))D0T)quU@*Mlvww%$A_P zr5=RkKzCEi^q8GNcFR;FsrH#D^X;z$V#uVNeZQ?c{TZ-zk8G2>{5S?zjlfFy z7)|?>m98OL`p^4w@=a9H4Cp#_qZq_-1ydVAnAq`YiWla@M))Zh$6BYHXW(rKr0+4{8UE@lsSqf2RJFm4e>FJjo{;JGfCXuQc2%_q{1zN!HGuAxDt{Mfsu+Spb)2!f(m-MJ6!^q3@qSA}ap=b zNx>3~W|~@Ck!if-o^gjTcc#m=&>W`+YWX6jDWrUrad8sSv0S`wv#Xml%A9i@URxdX zbg}>Bx#KfNe>aok$KzLR#aoGn^~U6?ST5e)-)dAoA$)$8+dqr#Xoi`HDa82jR6P`?uSgBmZId zNZQ?5Rem)hCoSqKsvnUsvXsoZoP*k-Q=xRBmXN@<(gwF^*3c$tJe)h5zk3)q-N5mh z5tFXfmnqi3UB1gcRGsT=KxS0mIuPpxEi>n5f;l_~olnNc5}mOwv)+PjH$tX~Txs$* z@7qNN<$Iq|uVjkKJ^(^pln3OlM-3&%^%bqHiC2j%|KKyCjg|81>7Z!hs(;46y`nu> z)ejeM;m5iW4<8nR+XHCxd_}KdjcQEw7m<`Xu$d%8wjySrLX;BLO3o&E*B6%pB;9jE zs>A07f1Xsuds?N8DsE&#PDwbQpZYh*GnO^I{X7C?I0_HYpJkmr%MaOLg9JR+SL+W= ziFwKzajT^xYKgkS&`SNfX~nl$OR7UB$4BxriaIOAz&=6R=#3J%ugsC+VO=_2cmSW|T{3 zy3IxLm*}~?9%hr@ha9s2^Q4AlZ@yF2`YEFWIxZ-Zg7w=0wj;I4k^40*I4grd>hl92 zCT`jIdeh|rkTu{UBvf5(sxs0425b3a#gm~ifxx0|w;D;|VF7Z}d5{e}IA;!Ppj+lh zRaGq)0y6?ti-u$^>KhocQx~K$+vm?Rh5-5}&xcrgpE~DTrx09t<58O`-O3y9E<|tUhK9;!Ibl7&RN` zGItY9VA(dGE>T1qKe1QbHfSavNtYR3sY=q|`ePOEL6$8>o#N$s%Z6bV!j^&lXaq1J zL)nS1uxyZ!v_$M1iPaB_uJV{gYi^xyuaX*8LTBHDB@?|ecHFAVNPz}6B+;HH&rJP! z(_$Y0FSZhDbSEhXV~)9V&Gax6z9k9ne7m@iJO}RB5$_5*f*t_V#~@|VpX>E8+m@*O z0Hkh=!y2+SlR-;{EQ4&DYTU-J7+zR*3->Oq((CfB5PnB_0cXNl z)uOt&SKeZ{-pZyYA4;8Uj{3p`&Jk*w>Ft6GbfZ=gC~M#O&xYb=@2O4pskDw1C9CGe z)F=24U$T5JHkAKb^PFP8G@gOVQmwxvJMwGfqd+W#BH4&6(YHlCDVpOX%^NfjVL<^U zX8m-(oDd>Z^0rZVnJaet^S}wtZ=@TzVC`6iHdfBPw#YA!G^rl8nk+oZSqvs?4W2sL z_Iz@8ixbY(i2n*3vyk38#jIpn^%CfUX!E7j)0pOwR=|EtNwqPr^;fWxK32%3QIo?o z30Wole#;9M)tlQrK{l%5Y}ZhR{FM%f5~NkYE*HpLZIapPla8Pg!P6z1;zcoK7ugLe z+@js?%;$gO$5nn(NoiOTTm!>9X{VXHj%O6fz|VN6uI2x9DFk@hz$LTV8orXsH}UNk zXoPDb>7z0x9-sPY3c5VZ7&r`X_hG3SF`LR8D@wv})oS7%1%9hp39)jmkUt8K(XhYoy|)kPTvT`Jk?!7y-a2 zWO+<-cARQv@s}dq*7>zl-C`0?k@Qs|&my+T6N za~R97Zusj9OZ=)aaa;$+H&*g-8$LpBpFd;G6`ksAriC>&L`-shc$On98&4MLaP}k? zZ;~uTfNe_i$R{gmZh%Uxx4et?uO6|cq{jFu3f^8bCCNF46=|Aj8?C}$;4|j-#Y_QU zqHJ;5MeUgcSZqE{3&=NAfw*}vIkwub?v4JU`|(-Be3c7ZvpxhhdZNhua~XRkSKaHJ z{fWv-v$!=~3Et)%j`1B})Lq;EN=(6uo~`0Xj3;4~I@W;ZxC@wunZau}@S6&Kh`7Hp z?-l?(aN=x+NGDPq=@N_=Sz!q&5`_nhHeR;B7A%K=R>X3h!w;82)dFj0o_$GRyczQK z@m0spAA#FPu)@9tV#WZdV{C(>OF{MLm2;qkS1W9beP$#H_`aUc2|as{Cv%)10C;`G zj+|~}rPa}I1pz#Nm@Zm3liG*rRENJp`R7G%OpP*Tf^I?j*@4ovVFce`1WvP~4TSjb z9I(Ha#Bc1)iv;V5gs~78v@Ur*E+-`XjjcFb+V*K>#CHNrcy|~*Y{Nx@)EnG!Vdrg4 zog@+0P!a4}(f8KDzuDKRdsn+raak{Z5OvyyGRzT;OYL{+WC(x72k_8zC;u}f88`K) zhsebyZG30GD^1Kd*wKL-ikB;JkKi{5F*8wbVvk)8h_E=7jJUDuX= z|B*e-Ake`V({66A`n236B=7$kV@T*A4+OX;Mspu3T+>Ml&zncN7q{zHwpA<~FTIgm za@&`o@^yUU4bI5&6>hnbM^ZR1k{SGzk<1orDz_%L*D|NgTzyPGb0)0DCz=$yk*fGC zOCHv>!ErZ?O_s2nOe5rg-@s*9(A-WE?jl z=Sl{K7mjmxn600ss@M*zHR*`B`+EirfQOttO2YmTRDH(JY)CJY(8f7QX=>s((8t=R z|0)$SE{a>_A0fh`u7W(ycJbe+EyRwfEZR+Gnl3*1GS#UVwR-k6FXR0(s*~Y8Xx%5u*t&DbNd$ zoqmBrI$mni{Xm?5J$00s%#PD@C`iE*<_#&5rb+mx4~S#q)sGQ#u%=?cKIhhyevE{c zm(UDaccS!KHBeM9KmOf+27A7(69Y&KkLs4eI;RrA>*m6fQ zP%#C8MF&M+H;Dak9i-lduAQ1P_Ca~;mB?v$l2!YEHw6?eb3e^Yu4|>?jNVjF`@~yi z2c10luGjqdkOHavh9rziQAux1T)eE(yW*BbwV|Ol`;q+%8JG3?aH+1oG%h@OBz=Wv z4Cbb^SK&?rbaXneK+XPhzqXXYs~UPmEKYM>*L=9Qsqi(sx|%g7RA&@eml$_2$m4M) znx_D;G}-c61nU;4(=3m;H_F%%FnL>O2a)A+CHz3OXyiZ`@_b5n++5Dn6PZcpU1v+1 zr8R*`0}b^Vyx16!(h;U?|M+S2=d7^v#Y_V|lg2n#5!MDce1^fF%$^?y#pOj%C9xnJ z@h32F9G5s{B|~BA#Lo1puI0~cau>i%lr}OcUlqEE%+Zs#(gIkF+Yeh%Y zkYk8yH%^~cw9f6p5HC5aeqoPd`#m0GkUY@xl%;7|8XEpYYLn z{bTPHP?L6Na>eY@BtQqd3+HQ;oxM1AH3J-DFO{Y$tGw?7JagWdSm5m2m~QiU=|h>g zsFmB95Tr#0lHxgiK0X($PHH4iCn)pzdi~Sugj5UWA%+1tnUu0p$HvsZoi?B4P<)ma z7_Favk@5v5(a7ES_2f8@y`;bnMkwN9n$hz4>$Nuy7CfGj>)eRG70%y90(RkN$o! zyzsa9!0jLuk(Zucy<`rW>t@KJdSM`3_;`bWM{bi(DHrQfpLeOE;=)TiX6az}T8|I* zJLw72Bj#b6gMnwI($Y@GeIOK6^n)8Y$6w*^Oz&+%Dw_U6tY-m_DnGvh7lo9VPKk;m z=vAIusVcWxYYEqh(#v$-2FkxSHS>>9MDF_pEhW|bA;5-hrX>vOO=K+8N?@a9<}?z?&+kKZps?8IK5BpNqn>dOXi@?ugu-_h zgLGq@c4v#>#%i=hjZR9hZPE_b55}+$epy4uW^5-9Y-%4&Br?RxX!V4YeYsN{N7d&2 z$)*%>%7o)|U@#fPNhTc|9qwXC9kwv}^aEqI#Qp_DhuV`n-PPw)(u;mr`CcgA7-#nY zWX>d%ci&+5b8~YGvWgXX4`^UPMj~pOgp{MzQL%=t^ow&;meQf`@8b9lpAQyjGViGJGd+4a`S51?t_7O z_XT7sFm)qm<5D^ZMVoP*0|U)|Y}(;${pa>y&w|AH8O@!|R}=qS#<5EOWdm^62Sb1Z z{<|MQGRF0v`4s=T$kDv}KZaB}`uM8)=KUdLcS-XWd*+)#v19B#VD4Eb`we4F=sjQ$ z`iCdy;0_@qwddCtnV-yl-3g9y`VG>-{6}eLs1Wx%p?koWKJ&8!*;BTv8LxZ5nCx^@0#&MHn~_QA zFTZ;LxHPqI^&arS>r}_)9^iLY@XvIwHj+6w*{pfV(;wz_7`{8)m3t;qeMx8pndyUE zJ*n2L=ARsaIL!rjo-Dq+2f(?Je*buiV}77GV`M^p`oBLzaq}LK4S{Jd$fWZxFdp*% zA^+D}BTc`(|Jt?Cwz;5%4NvYiCx;|fUqmR6!;-H-S`{*{6*kg@tf{H67JSZNwDs;h zn_^bNJ{yfmtUr4$F8!=C?WwC2m2dobCPrBkqOuL^H(Gfjlf{1$`~Sq-kfVA=!%eESIV&Rtn|ttZ_9V)JWv$TQR2&Y_=FVL<-U+4`~3ZdfeeKGB3uQN zgv2Ij67lImO};HMYeGcXr(o z+xwDy1?(5a*s(pl8>lMVkCr+`Z{4217Vrq?F3b-TI&~ukg8&)Vl5+*%hEMbx0jA+c z1zt(Mv6+OwY*yiR_W)h?>yl(7#ajpk(b!e(Sy?%4&YAVDx+{CFehfziQ?*a2kzwSE zG~e@;j>9abx3@yzNz-|Shd{x{tSmqR{fW6YBh5|rM+52XLoaVUUDS|7P0A9j5f=TG z_?>+jJA2HAH-T43#rC;@*j|f$x>tyki@@3l5dVav*f0V1jDR7|%?oFN*$DzJfU<2J z*VxKN=db=IP<}fjtVAeZG1UDol&$$z5{PVbD>X7^9(3`rr=N(!r7-t+Ew}$g;!1Dd zTqWviXZn0s*m$-wDdej{=ctvq_<@o*)u9}X{*3oSiIE% zhS8B#5|b3U`;y)R97k8ZC#`IqaH05XYAVmZd(KK&b@mW@78B)rgn|WF{$!F)+j)?74lUC^GLf%a)CC zJ!OOuGIyCCCPk*gzAzn^?D2{f!o4_aw47hGQTU9bgsWbW4r@nP&!890OlF{j>}HDi zY_kP1f^ZuRPkXRU}mgdL&7_auZVT?1Vc=oU_O$8!qN!;^eIHNi8 zV*vk_dJZ5K2IOnqquM_jG>tsoc}cfBJ9zVZ7eIL^|X?%JUT} zJ@ZY#uPNlc?+S670q9izV~O3ItX2VwPFLD}C68JXYS?7jm<=AgAjX@jP>cbroo3N$ z$84&GyND6IL#jP%i`QdY+u#nvWAS(yAHrSpU~pXPj63>eQ$w;S7^k!NfrH(2~p~;C#%HyZH6u)2Fdb@V;p z^toA%-qVS$8=i*`E@(;yQnN!ao9+QLRx>)|KE@S(IdAKs@}A`x=cy(viCg$pJvG5< zFw^a2t6C&eyo<)2JRPRaYYy#HyFm*MtgMMMCt<%T{}F8?X5?Q~{=nf3wIJCM#g9UNaH{eT-RxJTYuLH%PnkXoUEq`WwUuEB6Yh zBnANNO;z>sQ`R#gYU-*$j{4%&!c8z9b|^LtU_3u&b@jL%J+?Z`@BN8q|5Q!S)K*;El&Cs3>T&v#xyR4q-Pt=7sa@ zVryc#CXSGP`%9+sm#w)PVR{ef!9}|xso|Xh)#o{G{6EIhH*WXt0av@th>3M|bCag5 zMA#W42Upq4FvD{(|FyHql%uc^=}7#^Gzlfj@24>YaY#kTr?ImswJimx12N zZ|2YYj7Us=*%+|ef(dTezY5^}s7v*=Ms*O_TyHX_cprEB5$$P}%-sX(Cg<&t$$Urd z(hd7JUM5!;Zib)LLmqRFXcyE|kUd!n?9)&sSvgP)d8tdvcpeJ#aOLNP!T5d_mGqJq zZ+!n|3?AO{<6O)Pgv8W41=$wEh0PY~6`UAnT?AtHxj8Tm9_JD1gPQnEVKe(Fg zdH#@_P)?+x8NisBAwogNx-%rJP9zsgu$M91sYmC{ zqJv>^y8t)m+$0IT%iDE}+rui18z(-^Xa4cay}nBh+A20Br(k1A{7FPUBywR|54LQSZd`jb5W5tv&532*Myj?tXyem)EgbW zyRAzS{y;G6%@!s}^WJ%X^4xgplfsYMLQQsG1)g`IIP-r*BMNq*+3k6z!(fU`nk>W! zsPmiA3QwU;ti`X0e*504i<~>Mkr0Z3M2|3FtCRl*!<*3ch8GOI+j0#d@=6~x*+Q|9kq}BD>j$uD!D4I|@qb!~9Hp(=yu{4ZEfbSPYPZGS# zaXhesr|xdY^T|W_%#`!#S#3wNvCEg2>6Z4_4`t@{F9SWEaE@%n>(Zil=~7JEcEu4* zt(nHu^Y4p5wU0H0_)QqLp*1Qrm9Tafj@URvCMENWVtF*uy zI$8|vT0_2Y>iE&UpY<4~%hZnxh=TJ-sg(LB1suZigTy0?!XQbu`hT7r+EGi-;*1qL zId1kpGhiSc_fg~g;JLWiO}Ne>G5mZcagb>o&q-U!$Bnmd`grW=DDkqGs5?k1Hmryq$JWJ&00-lumxJ*2s)JKua4z*Mhn+idhmGs?*=0Z4R+dK$mTfmMz~(5i!axOb_8=fR%Z=P5rQ;e})D z5P_;iCLyAiBIX}E7#kZ?mk2VpfuGo$#p5Z+1np%t#Q(y$Wl6^@tG3TQFJgL48aup5 z9o{$r`4hgbJi^h`aKLKLkKq9PBbB!Xrjw^{6j5&0jmv*2>Wn&KBt-K}>?4`gb^AJ) zrun|Ac;W0Cbs6VyiQF9)dyPFLTwyOn#>gEYEq%WUXfM_?2`}GLGctVddI~xl3|2(M zNr|KT{NwKdnn@B&-`u+nv8YcsOArP~w_ug(ovVI0O}^ng=tmy#T4-pEJ1dNikhm~2 zTLIv!Wi&sp?!_a>rQ*euYR5RRq9+9T#Z_=rwi_iQ)Q%LPH@;SQo~DbeqMQ}FDdU8= zH9bHHNz&!8!A}RLhAW+E{UH6yY%5rQCbOumzBrpu2|M*6I!u~4X04=s=CIAFbn9I5 zgKtCPEAnh>xvGhEZ|#NT-7kU$!Te>8!sd=w`X!PkVu_qg5H;LjR&_mkgbx%gy#UR-L zcB}IP<8$f9CHDZk_K{pI&+d+MkL>JIjOHL07azB}1J)#=kqcVTVqw!s&dZxX)Hj~T6f8O-q5gpzc12d9M@itzx$VaZt@Jm19S~W z-veUVkUj(>{?4b@V&+Rj73mvP*btjPh=6lZ`n2c+vJbN${1l9#_|l2>d{GOE>IPxA)u`D1Q-gi< zHPb6!Ff|o&C({ND|Qpd&_1_~OH+W9yTCsZK?E3$`k zta>C>iZs(pj=r4&=j)3nM*S31!!22g3|8-7ulouD3&23YXnVbK3-jG0>8Sat%zH3+t3h1gqikhnR3+$p{ zm0|=gSn@=SC{CMd~ynE8dzsl9v^TDA%88Quc2F5S$*1=oC zuO?|*!PsBkLf)^r3w}{l0zt#Xs~Il5bf!VyO*y^z7|_2C^lnGi*pr0AAE2>{JHd&A z>NXA@kybY`<@L`t##5nsEX_>b6i973y5;lDPqJHD56B+wlY2lMm|hZT*ccvStcZ9) zD-pj!X3f~JM|!EBq98ie-rAR}Nvj$}G<5Kn5vNo@>RaLxtYUSi?JLMZ zdijzm+Q^g#IoB|Z;-uA3-cclBIvsTw$9rMD(#sh3>~*?a%04x4_$RA{io{`*Y?1sg zbz1F7cx$?k|GDEcUAN@lgudmGX0KRkZ+6S`!^BOo{S$hRmgY%_RJ@ax)FhP~YpV1T z=$`JIQpEM2S0>Nz+%aCWHf#4G2ZbQRdvasD#^kZzV~nEpuz<;7H~_u6$-bBCPoQjD-&yME%%f_N&hN*X!b~{hkzXHkA)x@ zOHD2WJz~%)#6ayT!K41${oav{wz@YHZ0_Z9EV=ZzPOX(N<36bVM;<{($O|_1Z%=6| zH8#R_|GqlQB?c`k=e+7XW=OWR_4FPYq0F8wZ!I6LsbD(@wVuJ=e6KkTza9=H>9?jY zJoM$|{n}NV%qGvKxM0tMcTyGJ+kZt|$)iz6jki7yYB|M)>4e^dkTkSbNB{MEv}$qJzBSqw0I!^-0k={zFzG zKR@0Gz=`0F%2t7FSk>=}Wpm^023<4L;f?v8)~xg7_osB@cr3*WW{?ZjO`W19F6F|h;#+ty@5;Qd6@=jeuZkXQBaT5` zsoO)PZ~<)4i1t(-g@C-gLESDtcH9YdfE8OEkpKghKYPDgChX=MsIld#fL*u&%(TZCv81T&X9qJFHC&%bp z23;f@Pm8Fv#&NymcH*u%Q8-UY92(?P?&=xjjl#vz8@5_j z^Q5mIXKQ$&5k8vaTWH>$x#dN;9}(48=5Cgo!o-{uO?Staj8qPiacGe74#DPd4=}OK zHX(NbR~-Pcnpaqzr^j~?-1a?zd(iqL7!h2#Z!HAhx(?B|m-jD!VEd>rk*wPE2IUQb zJ_0fV(-|cjByp=+VgTz^2f@bWb1D{49!aks#V=Ze`V+Mn9_eW(c_2kbs)w{D&wsE(+dk zle{;BEu>mG$fZ2AZV>?jh31r*I#JdS=gvAzr>@sWlB)5H&SF-agawR&c1!;Q5kMY( zXR`zxX8wA{O_Y~%iF@Aogkg1f<7Cc;u`kwtvq|=@s;(Y*%n(T)M+cqGOycDFKBJ7! zQpzHN5gAb8WX7Gew5(T~@OCo)i<*dxf2vxzB>za4IcRLs74cd2sXtWxrAC5QWHrE{ z?f7Yd%dhYS(%n;v#rcI-K95J;L?B*o78%3{OO7dYH7E8@=SQwf&Gdt|-_+qTg?Ypa zNQQXH(bfA)UTYuSmAVy1NgHT28|Oaz9nak9K~g&KO6K-hgT}SGREl!5SJLcj9%{_* zx~!37hg9tE-)+-&AtUSXXHYPuc%s_N<}NJ>VDh*YE=7EXA4%Mqi=wgzSQ^sCNtf~Q z#|X+=g4_Y^vRD!mtPmvilJ}4S3+n*S|K0N@JeC~LVvONtBwLh161&#WmM0a%D{LR~ ztN);w|3)*pvyVe_AQjv0;8c&N6r(-73-5a_-6K+^GLyT0=`9-cffQr5ppmEU57{Uc zPG*8>`J?`qsxzk$m>cBWXV0rHWSv^yygZZiQVZvSkHFh92=p0+gA{WJlimy^71eP+ zX9>rr9;)PhDnD(DI-NrVvCt?Kw#V-OF0eW5+EUqY_cGSKSnHjh(VbTdzVjxwPSi*1 zqq}~^M)n&u9BX_IC^0dgU*`E@{1A!Jam_Miamz&ji*3kxHpe8~Yvsb32`Zu=L!`U_ zDE}zV%eb_Fv5mZ(@;=JWm3pzVEi;S_jZf(e$?D})(d#(bOw6>#D{^LxW}qf6^?8K# zLe7P4_#QCGZLT`)>{H6%^a!Lp(IDuMCE$@Aiw6T(f}x>BUX}BU>Uj^w_yrRr`e2Md zlo*8)AYVzWbGE&DT3nW(KAni(joT8v>{b46^b-vW3?%dKma=j=e} zmP~#*$RFAJcg6$VTZ$p|5x=hQ_&_GC?mZNc=U42AHfv`<6rouUvm6`#LiD37X>9|A zYS-o&*X5n^9nq>~!)`8+l8|(L3HeJSpv#z7yjQFjD6KO{}+>U=)( z^FCR$6*igCm>htyS{2Zmtj$lAEEmB6F|m(1s61>_FW5>;?xruaSvC#$Nnyy3-*z@U z#*i{M!&&6-Y_5~JsI`#Xjqb%V4ZtR$Vo59-Z>d29L!zys=(jR7!KB75itS^ zlDjY2DDixg*L_kUKHkBhweRZ0D~TP!n^(Z~=Ti(MwT0h@#v!Wt9^UdO! z-Udv3agG%g>H{v?cFetzLMMF)G^8UBsQ<`GI;ESS*&UzD&_Y0B*WlFjrIw3g&~6sr zpjyPRU@!RbzzL)X-0((9o$EJITK=N~OruDKI<1!KHUw`-dyv zT)KO|(om$zFa$|_7kn#1tI-uP&};qNq3bRGBSJzyl=O@#K;~zJ1Z~iiMVVtcJ+HtufWR(JOQ$OqUR3&o>G)|#-q9lzWm@a1FSN`^G){gah!@p zO;G!BJ_HOh-Sa(9+8*(=@t6O0vhJ$Akd6CjP&X!?pxx2nm^i|(tYQJwlhhm7utQZq z_(k{Nd~glq0XERSxI7~z=7H4pw+uIjUL=bTnq|)x%NLq4Qtqa?QO+B;DzC=qbZvw& z9cRXda9}ci)G|@{waX6O8>Rd7tw1pHBubN-4w7T$R8bYw?b{Ncv$EyAuR3I`;&j6O zNUyH|ctEv1)9x(-(0(B23|K4g0mI5iW4E3y4@3kvtV*-bH?Z_rBd!9B26w0_|287E zQ~qgU;W-e-e!LO;B9ycv1@#D~I~MioZ(>?z#QbEDYM=;ih90SDvWTz2o*J=qan>X2!3|5nFKPxZb*+zx8mAc~J zJZN8inUO=ZbpqCju10{FpkETTiY?tK?X@QkGF9U zeJN6d11_RYTaLsk13X5r429i+^bn;(KAnTe1}w$R(h(jWWRW(eG(?zC2HS|c3Da(7{%F>c?a|3dGCk&z21i5 zQ7&=#e;6_1XZon9v|~k1^yJAFJ)`T0^4luez<{p(K~UMMsh#YT#jPvQtl9R7=!tUU zizBMn`+LnhgOe>{NPd#OH&bt9x8&8XCDPpQ0Y<+m2ZSNnS;sMMaUvs&ns4ikX(2L( z{P~%=KDr36OiCudTzprzvk?2fv=)t&z=ueY9c|wv@X2&>;B~1hU{LoR`_#=V`uO&u z+cIKUabMb#MWx5@rR;Npr0^04^wkzg+`^N=W&Vv-h~JF&OrZ|X!PgD{46$Yir>{dYP2{+8&xkP zy0P8G%l(#s&};LKnvZ>AlB|eNr0MLW@Z=7OE>D0q^5&?6`e-1p$NpJp7>s=ID?g#Y0m=PKEX%%d-T?MvjDo9@A0Mr%8wJ`oI5G@Q7 zrEu?gf)P_YwG3|X@-=A1PmQd0kj^G%e>d#tcZFD33k)&+38p-`Bm6dw=yP3NlEAAQ z4onq=#t*@M_z2A9w#-`U?QYjssTp=e`rQYKP-;rQ7s!w8$6bcb~9xUAZ(oVtofepS6#1COVFB z+MXDz*cXFGf!U6f%Yr)HB;wp~PswYf=qTkXBmS&tR$WM^s(+PL)T|&5HqJFkl+*9i#&$1&AbLp1Pj_}wfm!9XZ1&+8dS zZH)p8o)pJ0&?1wV>nTh|9ltE!2{|P$(5sdx|BAt^F ztz+!%-^->8!_CL3?{p(yZHaH67OPZjCtp;kwlzu>|^$K#bBVRP>1-O!rFqzy68-l9Rg1pw^u(YG-r&B+PL)E*S7e4S~udiR} zPyNoYE-mLhOF>NLegCv-@HnJSJeP3P&E&*6_v6D)u4>*8B&Y&>GkjM0$w4X=hK{x1=>!&7A)}pNlv29Vqd!uL7OsP~X)Z3cJ44;C0 zLM$t{`N?+lS;JKPRSH~7HQlTa^{ODn#~StaY&o|Hlwa%1;*HDrQccVKU+M+0(+kG{ zx=}vHma^~92n^jy8nngZrL%4JFmbjdv$-~-5BY+PttpeO^LD7*yR=PVNiW4$pklp* zA28nU1&w1D7$RvTY}8**O_B>K>*%?ZOD0E5wq2XXhv~*Gwqp*AdnKtGvT-JzSK}exCmScsq?9HRu~o!EI9?1)7a=7G*tbdVOTzF27J(OT zeMXlZf9?T%AskrdAk-P>jl9dsEt|uBu|LsNKXL#lI6yps(8NyORYxL-bTy8J90Cz2 zzY!0NZ1lDjX~Vml@88A$4&4LJ>(*rH# znpN(|z;?Gd1=AUNzq}m0 z##pcjJ(S@N8RL1iI96sQ3g|&Owj+N{ zKd(c&IBMW7!Q#oYLRV%&^(j98M=I|ZEdZ@&PNNe&n&1Kyxj2bN>k z6er>er}Iaa=_P&92JTp{#*Lw8lieOTsMp2;1yJEG|5<^Xet>+bqdrKl?BTR;{KFX= zj}H0RoL4&`6^Q{i+^h6%KWk@P_Yl-57FsaPVhL96^BKYr!O^$Y>gKE7wWg&>^g^R% z_!%i=64)Km9)rI)gUw5~{}`Lh?LaAOc@fQIA$3a>=x^N%RmLpH5*WaNHVdztQfppa zxT}#&7}qAi%zn*F5Req83k!By*@cWW?OUPbpzfi0r475gPa5E``DPfk{U3PeP!2c%3@A@0PgNx*=CxKOy5q z4IE9@sm9QaY#iy$^!Dqt4Rmsjs{;zTRe2H{LQ7aOhC=%p|QJikp;H z4U_lkFr3t*Ebq|xjkPX4cy#jeck#58l1K+@j#A_F9^3%}W6_8HUb76Kk9wW!i3@nh zN#LcMk*>TO?}rJP$$>uIR8?&A{RVMG7cukKucuEA)e*wR%jigM4lZR@ML7#4mLhqt zQvdRq9<7U%Gc`ym)o&;MdcGO?`05!CJa#JjQmkqu@1D1snlue}VlxXfd8)!6dL`X> z;fu!;RRj!r;a7*Ne4j;VGP>%Gr4mV4X-BkhH#Vydb&*mAK@W7KRsF?BVr&R!m1==d znJ30`CEfi|$p@hQMqtlZaYqdnM+$x(?aY?r@81vYXU-_d(Rn(w6ryokY-{Tg+2@P-4RsCg z7!1*hG;+ABDra34lG%wN9!Cpc!B;KUd)f*cZrXo1Oky|b69RWQav6y=wU9o_KKppO z@VaMeE?BRmLOt`;yNDLf2BoFBE0Zma$dgH8pC!8+;rv%i1-Wr4G2NjN!JQ<5BM=J) z{4P0XTZJ34CU+Dg7sbs19&hURPPpp;L(k~nCGH`Q%Rd0 zM~w}KfDpz`Af4au)~oML6TuH@x6!Vw!~D4e10%bgpX*`f@ue27Xv~t+l0bxEvSxE_ zZEd6f>NAx(6`Rl!Hwzr=ZJiyo<_Gqpq4DaaD^Vy=BU9-*iqPf$rj1 zX+s>PzIO}ArGMRRIUnF^!7HVn@U=Wi`pX%zYYhk}QNaX;M1& diff --git a/assets/swe_bench_lite.svg b/assets/swe_bench_lite.svg index c27d3269919..ae6934b48fb 100644 --- a/assets/swe_bench_lite.svg +++ b/assets/swe_bench_lite.svg @@ -6,7 +6,7 @@ - 2024-05-25T12:13:05.168797 + 2024-05-30T09:44:47.592823 image/svg+xml @@ -41,12 +41,12 @@ z - - + @@ -412,7 +412,7 @@ z - + @@ -528,12 +528,178 @@ z - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - + - - @@ -762,7 +877,7 @@ z - + @@ -771,125 +886,10 @@ z - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + @@ -928,7 +928,7 @@ z - + @@ -954,7 +954,7 @@ z - + @@ -1039,16 +1039,16 @@ z +" clip-path="url(#pc190475179)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - - + @@ -1085,11 +1085,11 @@ z +" clip-path="url(#pc190475179)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -1131,11 +1131,11 @@ z +" clip-path="url(#pc190475179)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -1163,11 +1163,11 @@ z +" clip-path="url(#pc190475179)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -1182,11 +1182,11 @@ L 690 161.676713 +" clip-path="url(#pc190475179)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -1228,11 +1228,11 @@ z +" clip-path="url(#pc190475179)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -1407,7 +1407,7 @@ L 163.368917 273.70025 L 163.368917 186.321891 L 96.917045 186.321891 z -" clip-path="url(#pbc28a2e89c)" style="fill: #b3d1e6; opacity: 0.3"/> +" clip-path="url(#pc190475179)" style="fill: #b3d1e6; opacity: 0.3"/> +" clip-path="url(#pc190475179)" style="fill: #b3d1e6; opacity: 0.3"/> +" clip-path="url(#pc190475179)" style="fill: #b3d1e6; opacity: 0.3"/> +" clip-path="url(#pc190475179)" style="fill: #b3d1e6; opacity: 0.3"/> +" clip-path="url(#pc190475179)" style="fill: #b3d1e6; opacity: 0.3"/> +" clip-path="url(#pc190475179)" style="fill: #17965a; opacity: 0.6"/> +" clip-path="url(#pc190475179)" style="fill: #17965a; opacity: 0.6"/> @@ -1542,8 +1542,50 @@ z + + + + + + + + + + + + + - + - - - - - - - - - - @@ -1743,7 +1775,7 @@ z - + diff --git a/benchmark/swe-bench-lite.txt b/benchmark/swe-bench-lite.txt index ea071b69d58..b73faad2235 100644 --- a/benchmark/swe-bench-lite.txt +++ b/benchmark/swe-bench-lite.txt @@ -1,7 +1,7 @@ 26.3% Aider|GPT-4o|& Opus 25.0% Aider|GPT-4o 25.0% Open|Devin -22.3% AutoCode|Rover 20.3% Amazon Q|Developer|Agent +19.0% AutoCode|Rover 18.0% SWE-|Agent|+ GPT-4 11.7% SWE-|Agent|+ Opus