From fdb5655c890211fcfd944c87933b69fb59d33047 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Thu, 8 Feb 2024 13:49:29 +0800 Subject: [PATCH 1/3] Update chrome examples --- desktop_env/assets/history_empty.sqlite | Bin 0 -> 196608 bytes desktop_env/controllers/setup.py | 127 +++++++++-- desktop_env/evaluators/metrics/slides.py | 50 ++++- .../44ee5668-ecd5-4366-a6ce-c1c9b8d4e938.json | 207 ++++++++++++++++++ .../358aa0a7-6677-453f-ae35-e440f004c31e.json | 2 +- .../a669ef01-ded5-4099-9ea9-25e99b569840.json | 7 +- 6 files changed, 364 insertions(+), 29 deletions(-) create mode 100644 desktop_env/assets/history_empty.sqlite diff --git a/desktop_env/assets/history_empty.sqlite b/desktop_env/assets/history_empty.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..c68c17c183f635ea828344fef0ba4259fafc2988 GIT binary patch literal 196608 zcmeI5349#IndrNx=jb_0mMlxMEo&^vl7)`ZNU~(vam?tjEM(c(mTeTs8(K51(b%II zc@7{Y8A!KuqWEYYREW3d$VSyN4AO|d);eJzz{Is*sAWT`s(`E*VSD|S0CKJC!Cbs)3NxhlyrAQKYs%Xh)Ey;B!C2v01`j~NB{{S0VIF~kN^@u0woD-5PW>;VS$Qu!gjvID1cxW z*72D!0E0_tuxdsC==?t{`sc{^$X}5Z=`NW(dLRKLfCP{L5qM@@*H^#PVk2WkN^@u0!RP}AOR$R z1dsp{Kmter3B2hDtTGDC{Cqe8oA?F&k#O`_!sHNyT0Rg-C6e$GfL-K;N!+D~QF4n(iO(#UK?S-nQ`lg(as1;gxeChr! zjuZEad+Rq9%2d;c8s{H;5oAnNZSSjZER?CDGV5M=>3J$cKK?*`L!rzXka^_si)!bo zO!dg^+v}OkcB5z%Gx_0HgUoIHWbk_Jgk6Zuv9Sp>FOwkN(}Y;>z)fh+uYyH(^}_>HNaX7`)NjZM zsrVG3EAC%Ii?q@DzZz>YGJG~yq6;GW&@sou3;S&q7<^l6BVILIv!YiGbHFavERK`%@pWGrryg~ zOdwPSGIXnOx@Gt|eht-yxgnfq_LjpRd-*Y%8{-SzRrOV>kRhvu8KIX1Dr7#nreTdL zlhag1=FI8asZ9A&pt7M-&2g?+fDux==Eg=4G6=@!e^kFZ7n52nD)ixRx83&~jj8&M zFE>`GGDcmFPMEH~^+}c^+hgA;uP;}n^!Wy<&u(W@53XuhrON2@eZtgtvz^N9vRqZx zP?izm1*cApEeOHq}N<0S!CCWgc-mGRO$Jzt}MivvaL%FIIn3VA3$r9}lWqZ|LkI*2fDP(I&G|;ej+`VnsK&3{$ z{NzSMIcLqZ&elv>n9QG5-$_N#^SJzXsaw9u;Q?sIunH#8C zUQ5kY>zTQZnq_OL>8hb7sb*#sGuJTpN@lKRE)~oyXXYyAUPetPp{Bz{O}mqs4r_>?GUB78p19-{cAMU2-S69mWMF$-xUx z9k4`500|%gB!C2v01`j~NB{{S0VMD?Bw&Ok{ms;9qQ)j_G*Y918f;X+%|-?4+|;3t z8XKrlOAR(6uy!3iT}zD`YE)CBiW+OEQAv%})Tp3FIW^!-f01Xy04_ER;B-+3Cp8?@ zuv0@f1V9}u)L;Vuf{C7H`~M>C|BJc)|NkLRlOMsX|GngIq2GTac|XkZ$B9fXBV**9 zFtguCs))_?e_g+F{nYh@>mk?OuG?I^@Kaz@87484KInVRgpBHgTxPSw6eHJZphX8CGR2u=zrn zYN4AfkgNr^QgCH0uw{ZXYk{p192pDS1HrCZ=w1gl)k60)uxc%$)?&$6*p>ul)k60k zfSvznHNr*&u*HB;wa`5SgscTN2Qb(LGu>c;+yUDF=;r@_O`a#uke`wt!y5pPlkbp+ z$pbJR@O7Bi{|fms`8>G=*8hKk{5km`xsIHH@qrWY#lSgoj7-Cu09TMB^fCP{L5CdzNNs$T;O6Va4{CR2$e>@l{e%K5EwQY`El#A;{!_n z{|6lT1Nkla75N4E8La;Q!5;+>CW-`*01`j~NB{{S0VIF~kN^@u0!RP}yg~wWoBtZN z{eLyv{jZ8`|G$Q9|5qVUUwd}{e+yZp>;Jz1@Aki!EW#H6&c4FLP#g&$0VIF~kN^@u z0!RP}AOR$R1dsp{C`y0`4ZLYZ)2b z;hmB?d;9vm?XoQOxA$!6oNC|N73gj6?Aa>y^!M}!I{RhMOl(&6q!M!6Gn9FWH{prJ zLml+Fzhh){{L+r;h?Ja@j>YBaS0oHuwcpn4;%nh^+qbjx`V)3~+#0;~ikW0`F0swy zSy)(55`q*xHqpkFbJd)iQ@v@*GpVT#>Km8mVjgL3E|HAO(rn2r*7tT#_sX4J-R;v; zo%ItlsWx|4w|kd7;7WK?WgK9ZDprBf>SRg4U z+m&QbB@BREmd)^0a*A9O#hrkBm?3wLwC?Edo$8l516}Rj-l?wkp6<@B_N@VLpnXeM zz`Lbys;Af6TO7I4EgGJeBEySGIhue>b({##6(wFm7IsTNVf~5hdBjzDa-2yR7$>OvqEg-j&1;;F#SaBzqB(&%9OiN%RzG&wn+8i_jKA8{r3~WSiUD?Y=~c(i-S-dwaLR|DN9b8i2x< zqJH{4JWyX)FdL1kq91o|JU&UOrz=N}>Jircd)&;YfE2O zcVFL@{{H@=B{l``P8MRyV57< zAGI5|A2!mXY2a^fRcnMM-Z39eq@+k9nF@wutXhJxK*F;#IKLwjj;0oSd;E}%!YgCt zVV6ryd2zqKb8*-3a8LK}z+mrix3|l?wQpBX-$3s!Z|{JwtEb=BIn*^Y)Y-YUr@L!Q zXZMz#&RtuEd=O$6SiM7F8LB@b2U%4Q$pPhJ^LB3Q>fhGgm0#8LVuMnJrD(@gI4Z@L z+UKOeF)1XMQUJF9pYsD8IZKvYKXaXQt#LjER{S9WB!C2v01`j~NB{{S0VIF~kihFn zU7FfJeR5By91QeJQrFh$o~bQU!S1b7Q`6I3Q(d0r#y`dl zCaX1Fs}e3g>X4X`vN_l}l?q3KJCuAh#S+RWMmzNO+hHuDmxlqIKyzwf`EN{(?O0VIF~kN^@u0!RP}AOR$R1dsp{KmxCwfZ51fd7}jm zB5yP^o593v2BX<%<#~f(GFq*?fiC&5()oX`?6s$gF(3gXfCP{L5<=Lwe@J@`7uP0X9Qaee=m?{bGgPdB!C2v01`j~NB{{S z0VIF~kN^@u0!ZL3N5CZTe3_A&E`gdhgHhnERwwx#SHa)Nkr4TgOLkS*KWT5Z{m6FG z)?~e1d`cX)++*nz9ur)K2lyZHH}cKg54h71srU<2>0 zFjpOH=Q#z^@klr+`=wMe#_WFevlIz`0zR}VC;Z-gm(}T+a!XZvJAYbWU#`ga&$m|$ z9vJpb47(?M1AB(u`M&PWs*67ybdTFI==EOd{{0U5rH z8uU*sCFMk+|8zJSPRz)KF}jC_cli$OnQ(WuU3y@2+;?EoJvKb4%SD@8Nwvk@GP25C zb@`U%S*79;e_%!mM|He%uI1^0GE3E#E&QqGJZ3o=l|#axb6Zt{T~5NixfnK<7eA}f zLN*ZCy=ULvk;A?NgS&ki*lrz>Fnk$Q6A#e%Q;Arr>gxCptkM@EoAa{b+12GylSW%8 z1(`3%Mj#cB!-s7BbMPVI;u%d!@sNBj0r-|lJn2t{XXV1;FQ+=mSW=3V(kqBKStQP& z6b;F7e{yNAh-VnS%NviU=8}H+3UVSAEzHu4l$dc3?AbSvQ+zoZERp}%oE$Bp*liIh zk@QOexMmY2LMG!9e54rA7UlrHpA31FjT95_DAAKPx9^Vny_1YRNAzCTX)#khP3gR00Jk<_SyW{P5x0qG9Ev6e2R_ypW_6i8!s zrA_Z=&7mr)t*T;f4qYQzOv1fcbuHGmD`*NYVVbAn&1GK3iwdiR*AO z;m@cotR{upP*g|pvFp&5mZoGCU-xEBWXy?H@MTtWm8XVZVAY$6hBn*Uv~0!F$_Z(c zP~xS2EtiRfF;Bx%S#8YLVln50v@tVR7JbV;i@B=3hKK7Jo-#9eW|fk&i^U?&%4p*S z$CjwsQnhIlpRQwiq?92MOT`1S_S}+*Q+bZk`e+{kw6|9!jsi_-Ig>J3s%mQZ>xOd) zCX|Ah3lm9{h~DixG_h}VF9aAL-aF9-1u00WP03~P`*4n^8myh9);9pl zsoewn_U#$=?ae+x?(*$9IGjtH-IwEjR^)7nG&&!U0yDCoJ@T*;X}lCnnoMi_ieS?C z7ELsq@E?oD7NY)S%%50_2A0ctAO?>G^hQ1n*LjrQabX3wQV_V;EmLfkcOVuCM~`W^ zXDM}N3AJ2mG*`8@@{>&K5=ze_AskjrGUxzM7kW$4mLyv%?n8S=_a7S8KKr-1=cPzWSMZi* zgSl#ZWvQMJO~LP_rU3nBk@lVG`~Mbl14sUoJVm}w?jv6%w}2IYNB{{S0VIF~kN^@u z0!RP}AOR$R1dzb%O2B0?alF|Iuj@yzedwE4axOEo3t78Kwabs z!Sp^2Z{R$$ga0vhCFeAlIIG#3weqUfa>D}`b585Oa^+k%XFY0NCq5|JEh&r1)F@Q* z|H5^X_q+bl)#*I!n703${SrIRowolo6#ewZT1!=TH-BxI4Q2!)sRWD%Cj1i2g|ShR zL=h*qZm6S(z~+pbc5o^?znnkvmK_#pQ^x%E9vUA$FgjRdG^nTpaY(M{fuSd2$FUhT!Shz z(gj2GEk{NtcJDhh;XbhM$mr0icdoZo)z z)<7)zVdsiKDgx8HxgpPNY0H(Pl)6D_!%1zVahY9#nfY!_YMEKpye3l3i+0Xc+itc=5jLZggtbDj zD1wgWt3j9^M_}DX3KUx7nn)-U?z)8@g>}~kYc|+I9T-;!TH#q5i#k=ZH?w@HC0b*t z>TBlHb~aa-nVo`g33?HM6*O3#vSM!QGj*%X-xrmqlZU!6n^hZ~FR!S!RPBcZw`&v3 zq{pUA6A{q&TnP1yrM|9M>gNbtn3Zf2ZP`51Ty3qgR5dsAr<6$wU79*ejZU?7UM1HM zsZyHeLd6gBH!zb-ORTm*x#mM@FPEF4X*swCrmSjtxB;;hg=}_FsrPKnC8~;HQ-w5) z5}WDG8+ha`Q_Mt4C$%z#NvtUzC0oyutN#R>2y2;J&F;wQZq|sXQK-ktVoVbyacY;$ z&^cXg8@m^hrmON=IF8@+x<0C22$7F$GFP=!^2&MxAeqq!sz}tfSy#n+f2uUp#Z5J) zC0*7CtwCQNttT4FQ1bZwMGS6vl#Y}QqMQ4#lM8X0Yk==`9%{8IDmkh@|s zlY(@aT1(p805>jZXLhm+gI!FSIpEClB%M=}uB^#`aGUW* zWuIu6$gJ#9(P*0ZMNbPZ#pi$iDGq+_A;TG`yNCO=tDxrRoYKkW@DhcBDw z;@&Jr%gz|Lq>pU0RPETopYkbB44DT_B{cLcV9hSvsnvdx(rbbzG1a|MM?viv<%ceO zfXEg!8&xLiVy&)5d$KXL6$Z@fSfm|pOVz%1KHb7pO3hnlB~-$%I;iWARJ$tBpwqE@ z--6&&3w0+Wq&|Y=G^Y4!Qr&Y|PB;*Y zMBu@V_8$sjYKd>KRC&Akv{Q>V!Fr^u?lYy)_Ov$^!>I|P;inCjt?(B5{=bFX#F76Z zKPOL+Z;`vmtzg9;5T zB>;j+wXzief>E`y1po%YX*63QDrG%@!2ve99DvRLEB*f;a^w%>MR*6`IT!`_3Hjk0 zQb0@u2_OL^fCP{L5+=Qo|7cYeT`aK76);`BQH(SFvk&GA{s^^W6?3H#sJKVm;&ztX+P6{}w<0ssRK{6!N$h;#NrvlurzCB9{R`9hMipkJ;SEcv?u|;R+D%kd zaKcX%cmDETMH9emXJ;!h*4AsIS*3|aj#h8f`q3n^%C_MRD`st}JXEhQ>9ek+&)Skc zH6?wjOZrrm^jTBVr?RBa>XJScrF`a`xq7H=Wm=G}qGej1d049)lv~y9VN13?id=SU z=xSG~yi}K$@&P@pURBDox~!xRDdjWgc%zHG+D_EA9WJ&C+O)%Kl&ft;U9H7jtIefK zQ*A2gV=U!UA!yTp%l74$<>=`e!>UX!&orlZjw`pI!;ye{Rt|*Dk> zLFxo=qsL?6U|~S&ISZRSE_YQ#0tx6#>_vRH`Ci(R8D5vdw?uM#X%^xi{PV1vw zqj-abY$Lb0K1xikD_oD0;M?}!+h%RI z*sZq5Z6?QN>&L9`wl-KFx4lFBwRnSg|4QyOD1!u$01`j~NB{{S0X>1vCMPEfywPej zn$5i7SNGg&=00!xO=Rr8o8a%0up;u&g~ylfSC21qXWN=-2vJZ%hAQhw$r~zq2XM5e6JhFhs=sSO^3zwfD z4S&|w%b7XO)1V2h5NsGa8X3Fy-{J2mNbQlwFRGnak5EF-hWr3V&xhWlOHR!m93gsTr}efap1a3+L{N*}J>A14@9eASO+}Ib0jD6!2{GEpNxVId>=Eg?# z2*vQMp<%LW&28Rq{^567rb+~9tI`#reBt1^q+6>&+K`Rjc+RM~wmMzh8_jJBp4=6n z(-5v}HO)MqEpXMh%&tA=w(`G(!uG-6Rw(F)zis@*Q6vAs)UQA`D=&OzqLRuBdw&DX1{|ZCVsLG_SU@Fb%C*)1 zu6O9dQ8{fm8vd;F2F}VVxos7VAPWvsK36_H4o!0GVl+1F7Iev}*@Gica6QVIxXv<7 z>6A;@ct_2B+F&{wpe^?NbjV?C+s%`TJ`iW^(5>t-I*)j%ypC zMTg_v+lHat6!HTax$njIXah1qZ9p2qS#pfEOJ){a)ji0hIrcYy2chqPzdLC%_q8=X zq#mJQ-9}Bpat%fv3?#9}++UNB{{S0VIF~kN^@u0!RP}Ab~d-0nsjUu;5OF)k(JO znN>a8Y!*4Ir`75doAcJO$d9`S7zIgMg(5Po zFwZMmwy7^x(*;>9TjRx>F2LHdHe6hPVH9hx6)r8&GuADN_3FhyFPx58JmgUw*y3c> zQC*Cc@MPt3PC+F)X;c(9s-d%DszZtBk~32DSfXPlIUC6a$;GVk)m=DZ-NhTMm*0FL zv;`u z$x+el+WDokzk}js+jF|?v;%gwO-YeNG8GKRSktQJy11{ZC+GG}kJ1~7xIgVl*NK+e zY?L$bkrH`9o}#;H%i3l5%S&WFt+3>wU%!SAF~bhNo=A9FP9&EivIo9`GDp7{vvVf0 zLyoqO3@9u2izW36Bf5%RX!`>PMa%l-bGf|43UZhg(WJd@`IwiN$VOb6PF5ssVg(;P zE{SH6jrvt-Od#Re8Jyn%TZ5+-dwcvrd8(B56iV9STpiWf5Q*zEZPQeT63t*% zPSxpzCp`r%13Um_S_bEZ*djPT&L*7{p!ybm!-dkqpQ>r(b0zSFlJ6LPz?XQFBcQx{Cw^8@}mUGRXopLxRq>MTm)bOlbdi&`iKyA)} zEqOKq#6^C4e~W0c3o0^wr=D?#clz>N^;UX|5G^zwqwQk*y_GjZifVMZemxymP@~Ip z)mzyehec;F?|k`W>y<^Lqk{&8(J|Ltbks`s|EnQ$%Km@9CeM>+$W!EB;Y$GDB@dJP z$v5D80AD3vCZ8iWlbgs#V2!|g$tm(DWFCUx4+$UvB!C2v01`j~NB{{S0VIF~kN^@W zL4a<;*KDU|6Ehoa^t{1J&3ciV8=2{5W*sv(Fte6z=C_`i>zGN{#(pklI+^KUrk!o| zXJcFaSp^!W*@1(HRLK0VIF~kN^@u0!RP} zAOR$R1dsp{_#+aiHkkQVo_Ct_-e@oeH}hueIQT5L`rjC&^Zy3&TRQ*$+dpDaFli)! z1dsp{Kmter2_OL^fCP{L5c01?1**RwS*?teF_(K9n00|%g zB!C2v01`j~ZyEx-O=galwp&-LTWFQoTv4-&tWGkw-Py1aVj3H>R_goXf;n)ChY`RJhU5S)jOj12TF4f;YO!#>~Su9=WL9tw>P zEc$#S1Cx`xkB)*|NGo@VD%Tkr9vwdsSlBxfTab>*F<)qK!58v1C1R;~Kn@3YDBE~8 zh2Qp(D+GTVh8i`54l}Vj>H*EIKpOMZ;R)-p*^b$l=s8lW|u*grlQ^6g(5f~w7E+oErgY1vpt<1}`rbs9UN_y7 zYh)-fxeyws>n;b`N;pOKz@U$=atjTO43AD8T^OEB`bH;vL!(Q>`$NNY1)XnkW^iP5 z>%hpK{evU>VuRx|!=nRq^_p*da{o;DQs04ar*G!yao^EVh>Na*D;n4CVsX)xZ+nmW zI;Eon$Hym*z#7ajtic>ExdyWakuhWwqf0AAR*lFgvVE@rS=Ffy;~D>3@4^gir59$3 zUYL0o@<_3Zk6p-gp>+O*oE#k(9NOPA-bEKi4||XMMn@CA(Sedz<(dvb($gwjXRcYX zUfCdwEhoPEl+k#`u9tfIO*mH^Hl{sU2+Df&1$iowjK$>+&AlQZCFM{ozT}Y?lX5f> zjztrmNO)RKB$pzxCovP7n+r!nJ7*$0NJf*B^QnV*JmtJdm|QlPvIyRB z%40l}cWD&SVqCASzGGompQH*uJ8#o?X=E+MvPijDH^Il4JK?9M$Vmpf!@eNR_$Y|rU}Ttg#&qMot1-OsdX+QEy?lBD>(Z1 zCQ8T@eYPA}P~N1VZ&3_{GH+2#tmu6mc=v|BF+tzaS^f!o<2fFc-D=IKHJp3Gt~*Dt z4Vm5z566p~?{jW+{K_%t_=u?p~Lt}AHSDDyY7XToF(g{At8H1=RC9H;hWBU z;#6A&=#YJ|ptMDv(DGV7t@T29we=jD!78@t>W2reF1K=NkM4>)Zyl1snRskgb{~X#LfVTUU)b}CI&4a3W_VtT46B{djuYXz+)eV?Pk#03FWvU^r@#F4EuS*O#dyo% zkG=fZv{U4cAJWwsyzC24*@=`CNhDLjuri(+j1_jshGK!#tQ<{BN$B{pX5%mm2D(JU zxd$O8Nauv`nTg7fL*yo7D@jLrnpYb*cVU!JqH!tE=H5fQhHdU6;V3-ICs;z!O|es# z+o5%l#G_c!{>f>*U6vVb^Qx>zyD|V-2C&~zw-2_KKpF?&uJ9x z*FX0BJIo@NYxZ>g#TA5xYrV6`^FIDFEJbqK5Sw=AC)(yt&dBa580m8Fg9#b;LAH5=nxnjI2v458%3$S2 zR*;MHp7WaWWGpoS%Q%(6h;43I#>shN z;pEzhWczxN+g_6&gJw;qGNVVuFFj(oyLBCmt5oHS=O&>i>4Hxx+<5<^ZS8B9iJOj8 z_(nl|=C+xh&Qp^$Fe)Iwnt6z#9sTUSniu$5&}Akq9$l<|&ziT6N@Gru01`j~NB{{S z0VIF~kN^@u0!RP}AOR$R1dsp{Kmter2_OL^fCP{L50: + if attempt > 0: time.sleep(5) browser = None with sync_playwright() as p: try: browser = p.chromium.connect_over_cdp(remote_debugging_url) - #break + # break except Exception as e: if attempt < 14: logger.error(f"Attempt {attempt + 1}: Failed to connect, retrying. Error: {e}") - #time.sleep(10) + # time.sleep(10) continue else: logger.error(f"Failed to connect after multiple attempts: {e}") @@ -379,7 +385,7 @@ class SetupController: try: page.goto(url, timeout=60000) except: - logger.warning("Opening %s exceeds time limit", url) # only for human test + logger.warning("Opening %s exceeds time limit", url) # only for human test logger.info(f"Opened tab {i + 1}: {url}") if i == 0: @@ -458,16 +464,17 @@ class SetupController: for p in paths: q = f'"{parent_id}" in parents and title = "{p}" and mimeType = "application/vnd.google-apps.folder" and trashed = false' folder = drive.ListFile({'q': q}).GetList() - if len(folder) == 0: # not exists, create it + if len(folder) == 0: # not exists, create it parents = {} if parent_id == 'root' else {'parents': [{'id': parent_id}]} - file = drive.CreateFile({'title': p, 'mimeType':'application/vnd.google-apps.folder', **parents}) + file = drive.CreateFile({'title': p, 'mimeType': 'application/vnd.google-apps.folder', **parents}) file.Upload() parent_id = file['id'] - else: parent_id = folder[0]['id'] + else: + parent_id = folder[0]['id'] return parent_id for oid, operation in enumerate(config['operation']): - if operation == 'delete': # delete a specific file + if operation == 'delete': # delete a specific file # query pattern string, by default, remove all files/folders not in the trash to the trash params = config['args'][oid] q = params.get('query', '') @@ -476,15 +483,19 @@ class SetupController: filelist: GoogleDriveFileList = drive.ListFile({'q': q_file}).GetList() q_folder = f"( {q} ) and mimeType = 'application/vnd.google-apps.folder'" if q.strip() else "mimeType = 'application/vnd.google-apps.folder'" folderlist: GoogleDriveFileList = drive.ListFile({'q': q_folder}).GetList() - for file in filelist: # first delete file, then folder + for file in filelist: # first delete file, then folder file: GoogleDriveFile - if trash: file.Trash() - else: file.Delete() + if trash: + file.Trash() + else: + file.Delete() for folder in folderlist: folder: GoogleDriveFile # note that, if a folder is trashed/deleted, all files and folders in it will be trashed/deleted - if trash: folder.Trash() - else: folder.Delete() + if trash: + folder.Trash() + else: + folder.Delete() elif operation == 'mkdirs': params = config['args'][oid] mkdir_in_googledrive(params['path']) @@ -508,7 +519,6 @@ class SetupController: else: raise ValueError('[ERROR]: not implemented clean type!') - def _login_setup(self, **config): """ Login to a website with account and password information. @args: @@ -537,7 +547,7 @@ class SetupController: raise e if not browser: return - + context = browser.contexts[0] platform = config['platform'] @@ -565,3 +575,82 @@ class SetupController: raise NotImplementedError return browser, context + + def _update_browse_history_setup(self, **config): + db_path = os.path.join("desktop_env", "assets", "history_empty.sqlite") + + # copy a new history file in the tmp folder + cache_path = os.path.join(self.cache_dir, "history_new.sqlite") + shutil.copyfile(db_path, cache_path) + db_path = cache_path + + history = config['history'] + + for history_item in history: + url = history_item['url'] + title = history_item['title'] + visit_time = datetime.now() - timedelta(seconds=history_item['visit_time_from_now_in_seconds']) + + # Chrome use ms from 1601-01-01 as timestamp + epoch_start = datetime(1601, 1, 1) + chrome_timestamp = int((visit_time - epoch_start).total_seconds() * 1000000) + + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + cursor.execute(''' + INSERT INTO urls (url, title, visit_count, typed_count, last_visit_time, hidden) + VALUES (?, ?, ?, ?, ?, ?) + ''', (url, title, 1, 0, chrome_timestamp, 0)) + + url_id = cursor.lastrowid + + cursor.execute(''' + INSERT INTO visits (url, visit_time, from_visit, transition, segment_id, visit_duration) + VALUES (?, ?, ?, ?, ?, ?) + ''', (url_id, chrome_timestamp, 0, 805306368, 0, 0)) + + conn.commit() + conn.close() + + logger.info('Fake browsing history added successfully.') + + controller = PythonController(self.vm_ip) + + # get the path of the history file according to the platform + os_type = controller.get_vm_platform() + + if os_type == 'Windows': + chrome_history_path = controller.execute_python_command( + """import os; print(os.path.join(os.getenv('USERPROFILE'), "AppData", "Local", "Google", "Chrome", "User Data", "Default", "History"))""")[ + 'output'].strip() + elif os_type == 'Darwin': + chrome_history_path = controller.execute_python_command( + """import os; print(os.path.join(os.getenv('HOME'), "Library", "Application Support", "Google", "Chrome", "Default", "History"))""")[ + 'output'].strip() + elif os_type == 'Linux': + chrome_history_path = controller.execute_python_command( + "import os; print(os.path.join(os.getenv('HOME'), '.config', 'google-chrome', 'Default', 'History'))")[ + 'output'].strip() + else: + raise Exception('Unsupported operating system') + + form = MultipartEncoder({ + "file_path": chrome_history_path, + "file_data": (os.path.basename(chrome_history_path), open(db_path, "rb")) + }) + headers = {"Content-Type": form.content_type} + logger.debug(form.content_type) + + # send request to server to upload file + try: + logger.debug("REQUEST ADDRESS: %s", self.http_server + "/setup" + "/upload") + response = requests.post(self.http_server + "/setup" + "/upload", headers=headers, data=form) + if response.status_code == 200: + logger.info("Command executed successfully: %s", response.text) + else: + logger.error("Failed to upload file. Status code: %s", response.text) + except requests.exceptions.RequestException as e: + logger.error("An error occurred while trying to send the request: %s", e) + + self._execute_setup(["sudo chown -R user:user /home/user/.config/google-chrome/Default/History"], shell=True) diff --git a/desktop_env/evaluators/metrics/slides.py b/desktop_env/evaluators/metrics/slides.py index d3b8542..63e1e39 100644 --- a/desktop_env/evaluators/metrics/slides.py +++ b/desktop_env/evaluators/metrics/slides.py @@ -139,6 +139,7 @@ def compare_pptx_files(file1_path, file2_path, **options): examine_number_of_slides = options.get("examine_number_of_slides", True) examine_shape = options.get("examine_shape", True) examine_text = options.get("examine_text", True) + examine_indent = options.get("examine_indent", True) examine_font_name = options.get("examine_font_name", True) examine_font_size = options.get("examine_font_size", True) examine_font_bold = options.get("examine_font_bold", True) @@ -146,6 +147,7 @@ def compare_pptx_files(file1_path, file2_path, **options): examine_color_rgb = options.get("examine_color_rgb", True) examine_font_underline = options.get("examine_font_underline", True) examine_strike_through = options.get("examine_strike_through", True) + examine_bullets = options.get("examine_bullets", True) # compare the number of slides if len(prs1.slides) != len(prs2.slides) and examine_number_of_slides: @@ -167,6 +169,12 @@ def compare_pptx_files(file1_path, file2_path, **options): # check if the paragraphs are the same for para1, para2 in zip(shape1.text_frame.paragraphs, shape2.text_frame.paragraphs): # check if the runs are the same + if para1.text != para2.text and examine_text: + return 0 + + if para1.level != para2.level and examine_indent: + return 0 + for run1, run2 in zip(para1.runs, para2.runs): # check if the font properties are the same @@ -192,7 +200,40 @@ def compare_pptx_files(file1_path, file2_path, **options): 'strike', 'noStrike') and examine_strike_through: return 0 - # fixme: Actually there are more properties to be compared, but we cannot get them through pptx + def _extract_bullets(xml_data): + root = ET.fromstring(xml_data) + + namespaces = { + 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main', + 'p': 'http://schemas.openxmlformats.org/presentationml/2006/main', + } + + bullets = [] + + for paragraph in root.findall('.//a:p', namespaces): + pPr = paragraph.find('a:pPr', namespaces) + if pPr is not None: + lvl = pPr.get('lvl') + buChar = pPr.find('a:buChar', namespaces) + char = buChar.get('char') if buChar is not None else "No Bullet" + buClr = pPr.find('a:buClr/a:srgbClr', namespaces) + color = buClr.get('val') if buClr is not None else "No Color" + else: + lvl = "No Level" + char = "No Bullet" + color = "No Color" + + text = "".join(t.text for t in paragraph.findall('.//a:t', namespaces)) + + bullets.append((lvl, char, text, color)) + + return bullets + + if _extract_bullets(run1.part.blob.decode('utf-8')) != _extract_bullets( + run2.part.blob.decode('utf-8')) and examine_bullets: + return 0 + + # fixme: Actually there are more properties to be compared, we can add them later via parsing the xml data return 1 @@ -414,6 +455,7 @@ if __name__ == '__main__': # r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\550ce7e7-747b-495f-b122-acdc4d0b8e54\New_Club_Spring_2018_Training_Gold.pptx")) # print(evaluate_presentation_fill_to_rgb_distance(r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\3b27600c-3668-4abd-8f84-7bcdebbccbdb\lec17-gui-events.pptx", {"rgb": (0, 0, 255)})) # print(check_auto_saving_time(r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\2cd43775-7085-45d8-89fa-9e35c0a915cf\registrymodifications.xcu", {"minutes": 3})) - print(compare_pptx_files(r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\a669ef01-ded5-4099-9ea9-25e99b569840\Writing-Outlines.pptx", - r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\a669ef01-ded5-4099-9ea9-25e99b569840\Writing-Outlines_Gold.pptx", - examine_shape=False)) + print(compare_pptx_files( + r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\a669ef01-ded5-4099-9ea9-25e99b569840\Writing-Outlines.pptx", + r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\a669ef01-ded5-4099-9ea9-25e99b569840\Writing-Outlines_Gold.pptx", + examine_shape=False)) diff --git a/evaluation_examples/examples/chrome/44ee5668-ecd5-4366-a6ce-c1c9b8d4e938.json b/evaluation_examples/examples/chrome/44ee5668-ecd5-4366-a6ce-c1c9b8d4e938.json index 3a31855..def3f9e 100644 --- a/evaluation_examples/examples/chrome/44ee5668-ecd5-4366-a6ce-c1c9b8d4e938.json +++ b/evaluation_examples/examples/chrome/44ee5668-ecd5-4366-a6ce-c1c9b8d4e938.json @@ -4,6 +4,213 @@ "instruction": "I am looking for an website address I accessed a month ago, but Youtube websites which take almost all of my browsing history are interrupting my search. This is too annoying. I want to remove all my Youtube browsing history first to facilitate my search. Could you help me clear browsing history from Youtube?", "source": "https://superuser.com/questions/1787991/clear-browsing-history-from-specific-site-on-chrome", "config": [ + { + "type": "update_browse_history", + "parameters": { + "history": [ + { + "url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", + "title": "Rick Astley - Never Gonna Give You Up (Official Music Video)", + "visit_time_from_now_in_seconds": 3600 + }, + { + "url": "https://www.youtube.com/watch?v=9bZkp7q19f0", + "title": "PSY - GANGNAM STYLE(강남스타일) M/V", + "visit_time_from_now_in_seconds": 1631 + }, + { + "url": "https://www.youtube.com/watch?v=3tmd-ClpJxA", + "title": "Maroon 5 - Sugar (Official Music Video)", + "visit_time_from_now_in_seconds": 900 + }, + { + "url": "https://www.nytimes.com/", + "title": "The New York Times", + "visit_time_from_now_in_seconds": 300 + }, + { + "url": "https://www.youtube.com/watch?v=OPf0YbXqDm0", + "title": "Ed Sheeran - Shape of You [Official Music Video]", + "visit_time_from_now_in_seconds": 1200 + }, + { + "url": "https://www.youtube.com/watch?v=JGwWNGJdvx8", + "title": "Taylor Swift - Shake It Off", + "visit_time_from_now_in_seconds": 2400 + }, + { + "url": "https://www.bbc.co.uk/", + "title": "BBC", + "visit_time_from_now_in_seconds": 1500 + }, + { + "url": "https://www.youtube.com/watch?v=2Vv-BfVoq4g", + "title": "Adele - Hello", + "visit_time_from_now_in_seconds": 1800 + }, + { + "url": "https://www.youtube.com/watch?v=YQHsXMglC9A", + "title": "Katy Perry - Roar (Official Music Video)", + "visit_time_from_now_in_seconds": 2100 + }, + { + "url": "https://www.cnn.com/", + "title": "CNN", + "visit_time_from_now_in_seconds": 2700 + }, + { + "url": "https://www.youtube.com/watch?v=ru0K8uYEZWw", + "title": "Justin Bieber - Baby ft. Ludacris (Official Music Video)", + "visit_time_from_now_in_seconds": 3200 + }, + { + "url": "https://www.youtube.com/watch?v=9bZkp7q19f0", + "title": "PSY - GANGNAM STYLE(강남스타일) M/V", + "visit_time_from_now_in_seconds": 3700 + }, + { + "url": "https://www.nationalgeographic.com/", + "title": "National Geographic", + "visit_time_from_now_in_seconds": 4000 + }, + { + "url": "https://www.youtube.com/watch?v=OPf0YbXqDm0", + "title": "Ed Sheeran - Shape of You [Official Music Video]", + "visit_time_from_now_in_seconds": 4300 + }, + { + "url": "https://www.youtube.com/watch?v=JGwWNGJdvx8", + "title": "Taylor Swift - Shake It Off", + "visit_time_from_now_in_seconds": 4700 + }, + { + "url": "https://www.bbc.co.uk/", + "title": "BBC", + "visit_time_from_now_in_seconds": 5000 + }, + { + "url": "https://www.youtube.com/watch?v=2Vv-BfVoq4g", + "title": "Adele - Hello", + "visit_time_from_now_in_seconds": 5300 + }, + { + "url": "https://www.youtube.com/watch?v=YQHsXMglC9A", + "title": "Katy Perry - Roar (Official Music Video)", + "visit_time_from_now_in_seconds": 5600 + }, + { + "url": "https://www.cnn.com/", + "title": "CNN", + "visit_time_from_now_in_seconds": 5900 + }, + { + "url": "https://www.youtube.com/watch?v=ru0K8uYEZWw", + "title": "Justin Bieber - Baby ft. Ludacris (Official Music Video)", + "visit_time_from_now_in_seconds": 6300 + }, + { + "url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", + "title": "Rick Astley - Never Gonna Give You Up (Official Music Video)", + "visit_time_from_now_in_seconds": 6700 + }, + { + "url": "https://www.nationalgeographic.com/", + "title": "National Geographic", + "visit_time_from_now_in_seconds": 7000 + }, + { + "url": "https://www.youtube.com/watch?v=OPf0YbXqDm0", + "title": "Ed Sheeran - Shape of You [Official Music Video]", + "visit_time_from_now_in_seconds": 7300 + }, + { + "url": "https://www.youtube.com/watch?v=JGwWNGJdvx8", + "title": "Taylor Swift - Shake It Off", + "visit_time_from_now_in_seconds": 7600 + }, + { + "url": "https://www.bbc.co.uk/", + "title": "BBC", + "visit_time_from_now_in_seconds": 7900 + }, + { + "url": "https://www.youtube.com/watch?v=2Vv-BfVoq4g", + "title": "Adele - Hello", + "visit_time_from_now_in_seconds": 8200 + }, + { + "url": "https://www.youtube.com/watch?v=YQHsXMglC9A", + "title": "Katy Perry - Roar (Official Music Video)", + "visit_time_from_now_in_seconds": 8500 + }, + { + "url": "https://www.cnn.com/", + "title": "CNN", + "visit_time_from_now_in_seconds": 8800 + }, + { + "url": "https://www.youtube.com/watch?v=ru0K8uYEZWw", + "title": "Justin Bieber - Baby ft. Ludacris (Official Music Video)", + "visit_time_from_now_in_seconds": 9100 + }, + { + "url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", + "title": "Rick Astley - Never Gonna Give You Up (Official Music Video)", + "visit_time_from_now_in_seconds": 9400 + }, + { + "url": "https://www.nationalgeographic.com/", + "title": "National Geographic", + "visit_time_from_now_in_seconds": 9700 + }, + { + "url": "https://www.youtube.com/watch?v=OPf0YbXqDm0", + "title": "Ed Sheeran - Shape of You [Official Music Video]", + "visit_time_from_now_in_seconds": 10000 + }, + { + "url": "https://www.youtube.com/watch?v=JGwWNGJdvx8", + "title": "Taylor Swift - Shake It Off", + "visit_time_from_now_in_seconds": 10300 + }, + { + "url": "https://www.bbc.co.uk/", + "title": "BBC", + "visit_time_from_now_in_seconds": 10600 + }, + { + "url": "https://www.youtube.com/watch?v=2Vv-BfVoq4g", + "title": "Adele - Hello", + "visit_time_from_now_in_seconds": 10900 + }, + { + "url": "https://www.youtube.com/watch?v=YQHsXMglC9A", + "title": "Katy Perry - Roar (Official Music Video)", + "visit_time_from_now_in_seconds": 11200 + }, + { + "url": "https://www.cnn.com/", + "title": "CNN", + "visit_time_from_now_in_seconds": 11500 + }, + { + "url": "https://www.youtube.com/watch?v=ru0K8uYEZWw", + "title": "Justin Bieber - Baby ft. Ludacris (Official Music Video)", + "visit_time_from_now_in_seconds": 11800 + }, + { + "url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", + "title": "Rick Astley - Never Gonna Give You Up (Official Music Video)", + "visit_time_from_now_in_seconds": 12100 + }, + { + "url": "https://www.nationalgeographic.com/", + "title": "National Geographic", + "visit_time_from_now_in_seconds": 12400 + } + ] + } + }, { "type": "launch", "parameters": { diff --git a/evaluation_examples/examples/libreoffice_impress/358aa0a7-6677-453f-ae35-e440f004c31e.json b/evaluation_examples/examples/libreoffice_impress/358aa0a7-6677-453f-ae35-e440f004c31e.json index 227002d..b594213 100644 --- a/evaluation_examples/examples/libreoffice_impress/358aa0a7-6677-453f-ae35-e440f004c31e.json +++ b/evaluation_examples/examples/libreoffice_impress/358aa0a7-6677-453f-ae35-e440f004c31e.json @@ -54,7 +54,7 @@ { "type": "sleep", "parameters": { - "seconds": 0.5 + "seconds": 15 } } ], diff --git a/evaluation_examples/examples/libreoffice_impress/a669ef01-ded5-4099-9ea9-25e99b569840.json b/evaluation_examples/examples/libreoffice_impress/a669ef01-ded5-4099-9ea9-25e99b569840.json index 853f01a..25c8780 100644 --- a/evaluation_examples/examples/libreoffice_impress/a669ef01-ded5-4099-9ea9-25e99b569840.json +++ b/evaluation_examples/examples/libreoffice_impress/a669ef01-ded5-4099-9ea9-25e99b569840.json @@ -9,7 +9,7 @@ "parameters": { "files": [ { - "url": "https://drive.usercontent.google.com/download?id=1C0u-Qvvwa6UbJVTzzQHfdNIgp2i051xA&export=download&authuser=0&confirm=t&uuid=5551a43c-3ff7-424f-b82c-50a5c96b5809&at=APZUnTViShb8pJUviOkmVtn7Pums:1707299959829", + "url": "https://drive.usercontent.google.com/download?id=1hr2flq5iSyMYSps6Jd-3pDOEfZoHFCbb&export=download&authuser=0&confirm=t&uuid=02746987-6ea8-4fbb-8817-8051dab152e7&at=APZUnTWaH071WARB_12CQDvjfg6b:1707314868059", "path": "/home/user/Desktop/Writing-Outlines.pptx" } ] @@ -71,16 +71,13 @@ "func": "compare_pptx_files", "expected": { "type": "cloud_file", - "path": "https://drive.usercontent.google.com/download?id=1d4WJwm7KDhDIOJ9r9vKhRTrt_bF3PSln&export=download&authuser=0&confirm=t&uuid=a8ec87de-96cf-49f0-98e4-faa1218354fe&at=APZUnTWiOoiHFjyM4jTunLP4t5wE:1707299961717", + "path": "https://drive.usercontent.google.com/download?id=15mnwwGTDlelIf27C1HdJOiMgfEWNIoLl&export=download&authuser=0&confirm=t&uuid=b53e5fbb-565b-4498-9dc5-071eded307e0&at=APZUnTUdfXCDVFEQPCTYckB-H2Fn:1707314644205", "dest": "Writing-Outlines_Gold.pptx" }, "result": { "type": "vm_file", "path": "/home/user/Desktop/Writing-Outlines.pptx", "dest": "Writing-Outlines.pptx" - }, - "options": { - "examine_shape": false } } } From 3f59ff46dcc263951381d59ca6a708a210a0f875 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Wed, 14 Feb 2024 11:59:50 +0800 Subject: [PATCH 2/3] Add infeasible support --- desktop_env/envs/desktop_env.py | 17 +++++++++++++++-- .../b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json | 2 +- .../e2eb4bf1-aa93-4192-b55d-03e2fb6dfd15.json | 2 +- .../fe41f596-a71b-4c2f-9b2f-9dcd40b568c3.json | 19 +------------------ 4 files changed, 18 insertions(+), 22 deletions(-) diff --git a/desktop_env/envs/desktop_env.py b/desktop_env/envs/desktop_env.py index b12fbca..5a8a03f 100644 --- a/desktop_env/envs/desktop_env.py +++ b/desktop_env/envs/desktop_env.py @@ -175,10 +175,16 @@ class DesktopEnv(gym.Env): if isinstance(self.evaluator["func"], list) \ else getattr(metrics, self.evaluator["func"]) self.metric_conj: str = self.evaluator.get("conj", "and") # take conjunction of multiple metrics - self.result_getter: Getter = [getattr(getters, "get_{:}".format(res["type"])) for res in + if "result" in self.evaluator: + self.result_getter: Getter = [getattr(getters, "get_{:}".format(res["type"])) for res in self.evaluator["result"]] \ if isinstance(self.evaluator["result"], list) \ else getattr(getters, "get_{:}".format(self.evaluator["result"]["type"])) + else: + self.result_getter = [None] * len(self.metric) \ + if isinstance(self.metric, list) \ + else None + if "expected" in self.evaluator: self.expected_getter: Getter = [getattr(getters, "get_{:}".format(exp["type"])) if exp else None for exp in self.evaluator["expected"]] \ @@ -293,6 +299,12 @@ class DesktopEnv(gym.Env): self.setup_controller.setup(self.evaluator.get("postconfig", [])) + if self.metric == "infeasible": + if self.action_history[-1] == "FAIL": + return 1 + else: + return 0 + if type(self.metric) == list: results = [] for idx, metric in enumerate(self.metric): @@ -315,7 +327,8 @@ class DesktopEnv(gym.Env): return 0 elif self.metric_conj == 'or' and float(metric) == 1.0: return 1 - else: results.append(metric) + else: + results.append(metric) return sum(results) / len(results) if self.metric_conj == 'and' else max(results) else: try: diff --git a/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json b/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json index be03893..d7bb963 100644 --- a/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json +++ b/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json @@ -9,7 +9,7 @@ "os" ], "evaluator": { - "func": "", + "func": "infeasible", "result": { }, "expected": { diff --git a/evaluation_examples/examples/os/e2eb4bf1-aa93-4192-b55d-03e2fb6dfd15.json b/evaluation_examples/examples/os/e2eb4bf1-aa93-4192-b55d-03e2fb6dfd15.json index fdf0ede..70cb9e7 100644 --- a/evaluation_examples/examples/os/e2eb4bf1-aa93-4192-b55d-03e2fb6dfd15.json +++ b/evaluation_examples/examples/os/e2eb4bf1-aa93-4192-b55d-03e2fb6dfd15.json @@ -8,7 +8,7 @@ "os" ], "evaluator": { - "func": "", + "func": "infeasible", "result": { }, "expected": { diff --git a/evaluation_examples/examples/os/fe41f596-a71b-4c2f-9b2f-9dcd40b568c3.json b/evaluation_examples/examples/os/fe41f596-a71b-4c2f-9b2f-9dcd40b568c3.json index 79d8618..b20f794 100644 --- a/evaluation_examples/examples/os/fe41f596-a71b-4c2f-9b2f-9dcd40b568c3.json +++ b/evaluation_examples/examples/os/fe41f596-a71b-4c2f-9b2f-9dcd40b568c3.json @@ -5,32 +5,15 @@ "source": "https://help.ubuntu.com/lts/ubuntu-help/power-percentage.html.en", "trajectory": "trajectories/", "config": [ - { - "type": "execute", - "parameters": { - "command": [ - "python", - "-c", - "import pyautogui; import time; pyautogui.click(960, 540); time.sleep(0.5);" - ] - } - } ], "related_apps": [ "os" ], "evaluator": { - "func": "exact_match", + "func": "infeasible", "result": { - "type": "vm_command_line", - "command": "gsettings get org.gnome.desktop.interface show-battery-percentage", - "shell": true }, "expected": { - "type": "rule", - "rules": { - "expected": "true\n" - } } } } \ No newline at end of file From 8d69eec68fe616ab3b9d76a8da8f7d3b42ece289 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Wed, 14 Feb 2024 16:51:07 +0800 Subject: [PATCH 3/3] Update infeasible examples from Chrome and Calc --- .../3720f614-37fd-4d04-8a6b-76f54f8c222d.json | 19 ++++++ .../93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9.json | 19 ++++++ .../ae78f875-5b98-4907-bbb5-9c737fc68c03.json | 26 +------- .../2bd59342-0664-4ccb-ba87-79379096cc08.json | 66 +------------------ .../7b802dad-6e0f-4204-9815-d4e3f57627d8.json | 19 ++++++ 5 files changed, 60 insertions(+), 89 deletions(-) create mode 100644 evaluation_examples/examples/chrome/3720f614-37fd-4d04-8a6b-76f54f8c222d.json create mode 100644 evaluation_examples/examples/chrome/93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9.json create mode 100644 evaluation_examples/examples/libreoffice_calc/7b802dad-6e0f-4204-9815-d4e3f57627d8.json diff --git a/evaluation_examples/examples/chrome/3720f614-37fd-4d04-8a6b-76f54f8c222d.json b/evaluation_examples/examples/chrome/3720f614-37fd-4d04-8a6b-76f54f8c222d.json new file mode 100644 index 0000000..ee0b3c5 --- /dev/null +++ b/evaluation_examples/examples/chrome/3720f614-37fd-4d04-8a6b-76f54f8c222d.json @@ -0,0 +1,19 @@ +{ + "id": "3720f614-37fd-4d04-8a6b-76f54f8c222d", + "snapshot": "chrome", + "instruction": "I am more familiar with Korean as I am from Korea. I want to use chrome with my mother tongue. Could you help me change the Chrome interface language to Korean? ", + "source": "https://superuser.com/questions/984668/change-interface-language-of-chrome-to-english", + "config": [ + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func": "infeasible", + "result": { + }, + "expected": { + } + } +} diff --git a/evaluation_examples/examples/chrome/93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9.json b/evaluation_examples/examples/chrome/93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9.json new file mode 100644 index 0000000..331a46e --- /dev/null +++ b/evaluation_examples/examples/chrome/93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9.json @@ -0,0 +1,19 @@ +{ + "id": "93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9", + "snapshot": "chrome", + "instruction": "Could you assist me in turning off the dark mode feature in Google Chrome? I've noticed that while dark mode is great for reducing glare, it actually makes it more challenging for me to read text clearly, especially with my astigmatism.", + "source": "https://superuser.com/questions/1417973/how-to-disable-google-chrome-dark-mode", + "config": [ + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func": "infeasible", + "result": { + }, + "expected": { + } + } +} diff --git a/evaluation_examples/examples/chrome/ae78f875-5b98-4907-bbb5-9c737fc68c03.json b/evaluation_examples/examples/chrome/ae78f875-5b98-4907-bbb5-9c737fc68c03.json index 41c0dea..e19fdc5 100644 --- a/evaluation_examples/examples/chrome/ae78f875-5b98-4907-bbb5-9c737fc68c03.json +++ b/evaluation_examples/examples/chrome/ae78f875-5b98-4907-bbb5-9c737fc68c03.json @@ -4,40 +4,16 @@ "instruction": "Could you please change the number of search results displayed on one page to 50? I find that having more results visible at once significantly enhances my research efficiency, as it reduces the need to constantly click through multiple pages. ", "source": "https://support.google.com/chrome/thread/219988391/increase-search-results-per-page?hl=en", "config": [ - { - "type": "launch", - "parameters": { - "command": [ - "google-chrome", - "--remote-debugging-port=1337" - ] - } - }, - { - "type": "launch", - "parameters": { - "command": [ - "socat", - "tcp-listen:9222,fork", - "tcp:localhost:1337" - ] - } - } ], "trajectory": "trajectories/", "related_apps": [ "chrome" ], "evaluator": { - "func": "exact_match", + "func": "infeasible", "result": { - "type": "number_of_search_results" }, "expected": { - "type": "rule", - "rules": { - "expected": 50 - } } } } diff --git a/evaluation_examples/examples/libreoffice_calc/2bd59342-0664-4ccb-ba87-79379096cc08.json b/evaluation_examples/examples/libreoffice_calc/2bd59342-0664-4ccb-ba87-79379096cc08.json index 6b92cc1..aba58cd 100644 --- a/evaluation_examples/examples/libreoffice_calc/2bd59342-0664-4ccb-ba87-79379096cc08.json +++ b/evaluation_examples/examples/libreoffice_calc/2bd59342-0664-4ccb-ba87-79379096cc08.json @@ -4,78 +4,16 @@ "instruction": "Make sparkline chart line by line", "source": "https://www.youtube.com/shorts/L3Z-F1QTQFY", "config": [ - { - "type": "download", - "parameters": { - "files": [ - { - "url": "https://drive.usercontent.google.com/download?id=1uywX5XWMvesnb4-8LPKEzr2HFU7HmoIu&export=download&authuser=0&confirm=t&uuid=267bfe49-a861-4272-ae7c-39c95df35e84&at=APZUnTUbs-FF06hSMv3yWfdXc02l:1703508870351", - "path": "/home/user/OrderId_Month_Chart.xlsx" - } - ] - } - }, - { - "type": "open", - "parameters": { - "path": "/home/user/OrderId_Month_Chart.xlsx" - } - } ], "trajectory": "trajectories/2bd59342-0664-4ccb-ba87-79379096cc08", "related_apps": [ - "libreoffice calc" + "libreoffice_calc" ], "evaluator": { - "postconfig": [ - { - "type": "activate_window", - "parameters": { - "window_name": "OrderId_Month_Chart.xlsx - LibreOffice Calc", - "strict": true - } - }, - { - "type": "sleep", - "parameters": { - "seconds": 0.5 - } - }, - { - "type": "execute", - "parameters": { - "command": [ - "python", - "-c", - "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" - ] - } - } - ], - "func": "compare_table", + "func": "infeasible", "expected": { - "type": "cloud_file", - "path": "https://drive.usercontent.google.com/download?id=1KQJJLVPGtTL_7ArEWvwwbFbJSiA3cgSE&export=download&authuser=0&confirm=t&uuid=6b11c721-caad-439a-b369-4c13c7a485df&at=APZUnTV5-1isKrDKSHV9NeJ6TDeS:1703509054094", - "dest": "OrderId_Month_Chart_gold.xlsx" }, "result": { - "type": "vm_file", - "path": "/home/user/OrderId_Month_Chart.xlsx", - "dest": "OrderId_Month_Chart.xlsx" - }, - "options": { - "rules": [ - { - "type": "sparkline", - "sheet_idx0": 0, - "sheet_idx1": "EI0" - }, - { - "type": "sheet_data", - "sheet_idx0": 0, - "sheet_idx1": "EI0" - } - ] } } } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_calc/7b802dad-6e0f-4204-9815-d4e3f57627d8.json b/evaluation_examples/examples/libreoffice_calc/7b802dad-6e0f-4204-9815-d4e3f57627d8.json new file mode 100644 index 0000000..0ebfeaf --- /dev/null +++ b/evaluation_examples/examples/libreoffice_calc/7b802dad-6e0f-4204-9815-d4e3f57627d8.json @@ -0,0 +1,19 @@ +{ + "id": "7b802dad-6e0f-4204-9815-d4e3f57627d8", + "snapshot": "libreoffice_calc", + "instruction": "I would like to sort this table based on cell color, placing all the rows marked with pink at the beginning, while keeping their order among themselves unchanged.", + "source": "https://www.youtube.com/shorts/Of-lzeP1usE", + "config": [ + ], + "trajectory": "trajectories/7b802dad-6e0f-4204-9815-d4e3f57627d8", + "related_apps": [ + "libreoffice_calc" + ], + "evaluator": { + "func": "infeasible", + "expected": { + }, + "result": { + } + } +} \ No newline at end of file