From cb62b3c87798f8acd228bdc41a185e5addd1d9b6 Mon Sep 17 00:00:00 2001 From: adlsdztony Date: Sun, 1 Jun 2025 04:48:50 +0000 Subject: [PATCH] feat&fix: update paths in configuration, enhance error handling, and improve UI elements --- evaluation_examples/test_small.json | 29 +---- monitor/Dockerfile | 5 +- monitor/README.md | 8 +- monitor/docker-compose.yml | 1 + monitor/main.py | 59 +++++++++- monitor/static/favicon.ico | 0 monitor/static/favicon.png | Bin 0 -> 9895 bytes monitor/static/index.css | 40 ++++++- monitor/static/index.js | 41 ++++++- monitor/static/task_detail.css | 177 +++++++++++++++++++++++++--- monitor/templates/index.html | 18 ++- monitor/templates/task_detail.html | 45 ++++++- run_multienv_openaicua.py | 4 + 13 files changed, 359 insertions(+), 68 deletions(-) create mode 100644 monitor/static/favicon.ico create mode 100644 monitor/static/favicon.png diff --git a/evaluation_examples/test_small.json b/evaluation_examples/test_small.json index dbf95d3..3e0f127 100644 --- a/evaluation_examples/test_small.json +++ b/evaluation_examples/test_small.json @@ -29,33 +29,6 @@ "46407397-a7d5-4c6b-92c6-dbe038b1457b", "4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc", "510f64c8-9bcc-4be1-8d30-638705850618", - "897e3b53-5d4d-444b-85cb-2cdc8a97d903", - "c867c42d-a52d-4a24-8ae3-f75d256b5618", - "74d5859f-ed66-4d3e-aa0e-93d7a592ce41", - "b5062e3e-641c-4e3a-907b-ac864d2e7652", - "48d05431-6cd5-4e76-82eb-12b60d823f7d", - "eb303e01-261e-4972-8c07-c9b4e7a4922a", - "d1acdb87-bb67-4f30-84aa-990e56a09c92", - "deec51c9-3b1e-4b9e-993c-4776f20e8bb2", - "8e116af7-7db7-4e35-a68b-b0939c066c78", - "716a6079-22da-47f1-ba73-c9d58f986a38", - "2373b66a-092d-44cb-bfd7-82e86e7a3b4d" - ], - "os": [ - "5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57", - "5812b315-e7bd-4265-b51f-863c02174c28" - ], - "thunderbird": [ - "dfac9ee8-9bc4-4cdc-b465-4a4bfcd2f397", - "15c3b339-88f7-4a86-ab16-e71c58dcb01e" - ], - "vlc": [ - "59f21cfb-0120-4326-b255-a5b827b38967", - "8f080098-ddb1-424c-b438-4e96e5e4786e" - ], - "vs_code": [ - "0ed39f63-6049-43d4-ba4d-5fa2fe04a951", - "53ad5833-3455-407b-bbc6-45b4c79ab8fb", - "276cc624-87ea-4f08-ab93-f770e3790175" + "897e3b53-5d4d-444b-85cb-2cdc8a97d903" ] } \ No newline at end of file diff --git a/monitor/Dockerfile b/monitor/Dockerfile index 4692b7f..04311e0 100644 --- a/monitor/Dockerfile +++ b/monitor/Dockerfile @@ -1,14 +1,11 @@ FROM python:3.9-slim -WORKDIR /app +WORKDIR /app/monitor # Install dependencies COPY monitor/requirements.txt ./ RUN pip install --no-cache-dir -r requirements.txt -# Copy application code -COPY monitor/ ./ - # Expose port (will be overridden by environment variable) ARG FLASK_PORT=8080 EXPOSE ${FLASK_PORT} diff --git a/monitor/README.md b/monitor/README.md index b746eb6..e2d640a 100644 --- a/monitor/README.md +++ b/monitor/README.md @@ -28,11 +28,11 @@ The monitor can be configured by editing the `.env` file in the monitor director For example: ```bash # .env -TASK_CONFIG_PATH=evaluation_examples/test_small.json -EXAMPLES_BASE_PATH=evaluation_examples/examples -RESULTS_BASE_PATH=results_operator_aws/pyautogui/screenshot/computer-use-preview +TASK_CONFIG_PATH=../evaluation_examples/test_small.json +EXAMPLES_BASE_PATH=../evaluation_examples/examples +RESULTS_BASE_PATH=../results_operator_aws/pyautogui/screenshot/computer-use-preview MAX_STEPS=50 -FLASK_PORT=8080 +FLASK_PORT=80 FLASK_HOST=0.0.0.0 FLASK_DEBUG=true ``` diff --git a/monitor/docker-compose.yml b/monitor/docker-compose.yml index f5463fa..f7fb056 100644 --- a/monitor/docker-compose.yml +++ b/monitor/docker-compose.yml @@ -8,6 +8,7 @@ services: ports: - "${FLASK_PORT:-8080}:8080" volumes: + - .:/app/monitor - ../evaluation_examples:/app/evaluation_examples - ../results_operator_aws:/app/results_operator_aws env_file: diff --git a/monitor/main.py b/monitor/main.py index 1e1bfec..1d6bd96 100644 --- a/monitor/main.py +++ b/monitor/main.py @@ -15,9 +15,9 @@ load_dotenv() app = Flask(__name__) # Load configuration from environment variables -TASK_CONFIG_PATH = os.getenv("TASK_CONFIG_PATH", "evaluation_examples/test_small.json") -EXAMPLES_BASE_PATH = os.getenv("EXAMPLES_BASE_PATH", "evaluation_examples/examples") -RESULTS_BASE_PATH = os.getenv("RESULTS_BASE_PATH", "results_operator_aws/pyautogui/screenshot/computer-use-preview") +TASK_CONFIG_PATH = os.getenv("TASK_CONFIG_PATH", "../evaluation_examples/test_small.json") +EXAMPLES_BASE_PATH = os.getenv("EXAMPLES_BASE_PATH", "../evaluation_examples/examples") +RESULTS_BASE_PATH = os.getenv("RESULTS_BASE_PATH", "../results_operator_aws/pyautogui/screenshot/computer-use-preview") MAX_STEPS = int(os.getenv("MAX_STEPS", "50")) def load_task_list(): @@ -71,11 +71,59 @@ def get_task_status(task_type, task_id): last_step = steps[-1] - # check if the task is done + # Check the log file for agent responses and exit conditions + log_data = { + "agent_responses": [], + "exit_condition": None, + "last_message": None + } + + if os.path.exists(log_file): + try: + with open(log_file, 'r') as f: + log_content = f.readlines() + last_response = None + + for i, line in enumerate(log_content): + # Extract agent responses for each step + if "Responses: [" in line: + response_text = line.split("Responses: [")[1].strip() + if response_text.endswith("]"): + response_text = response_text[:-1] # Remove closing bracket + + # Clean up the response text - remove quotes + if response_text.startswith("'") and response_text.endswith("'"): + response_text = response_text[1:-1] # Remove surrounding quotes + elif response_text == '"]': # Empty response + response_text = "" + + # Handle list of responses + if response_text and "', '" in response_text: + responses = [r.strip("'") for r in response_text.split("', '")] + log_data["agent_responses"].append(responses[0]) # Use first response + last_response = responses[0] # Keep track of the last response + elif response_text: + log_data["agent_responses"].append(response_text) + last_response = response_text # Keep track of the last response + + # Check for exit conditions near the end of the log + if "The state of the agent is not correct" in line or "Exit condition met" in line: + log_data["exit_condition"] = line.strip() + # If this is a message exit, save the last response as the last message + if "message_exit: True" in line and last_response: + log_data["last_message"] = last_response + except Exception as e: + log_data["error"] = f"Error parsing log file: {str(e)}" + + # check if the task is done based on both trajectory and log if last_step.get("done", False): status = "Done" elif last_step.get("Error", False): status = "Error" + elif log_data.get("exit_condition") and "message_exit: True" in log_data.get("exit_condition", ""): + status = "Done (Message Exit)" + elif len(steps) >= MAX_STEPS: + status = "Done (Max Steps)" else: status = "Running" @@ -86,7 +134,7 @@ def get_task_status(task_type, task_id): last_update = "None" result_content = "Task not completed" - if status == "Done": + if status.startswith("Done"): if os.path.exists(result_file): with open(result_file, 'r') as f: result_content = f.read().strip() @@ -99,6 +147,7 @@ def get_task_status(task_type, task_id): "max_steps": MAX_STEPS, "last_update": last_update, "steps": steps, + "log_data": log_data, "result": result_content } diff --git a/monitor/static/favicon.ico b/monitor/static/favicon.ico new file mode 100644 index 0000000..e69de29 diff --git a/monitor/static/favicon.png b/monitor/static/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..7fe40aba6c0eb942f5351c86313cd7ffaff24f3b GIT binary patch literal 9895 zcmeHt=|7Zj*uH&>K`3<7NM$Rs&)79YDJi=wWtXL4vM-G+L(8BV!XQh9VU(|Z-!hD` zCA%<&#+EHxD8IYktLG1RUOw~r%-k=onftot+>Y}&?laNM)QE>ml#79Zf#>5VIK=pbXay^esXde`aw;2wK+d$^|!tH`_Lam;JXatzn%NPTDL+Lp7zL z&^4MGmZqaYlh0SKp}>|uJ$hi&5}O>$*VZJ!!FO^u_Com6afvgWv8!_X5zW=bron;W z%VX(ATGLUKuT;@_%Rl&o!-oFQ{w3pAKW3EswUp&0Bpzs@xi)mIW+&h4`C3_xL~{yU zY`Ap&ZZg}6-|g#b6P0oYlkeG^?W|gCqDx9jBpbta>iQVFi4OHvXP(GC!3pgL$(mVM zypg^Bb+5o9Co+gqZO#-mT!OA;YHe*jYr@INsYz*mnN-d+W6CNjDoVb&z<5*TinaC;(ms{P+3fz^%3aZ|nbQa5>(}N+|y-TF9i zIE?eVvPeZ99<{R89H>9vn413yL)O7&omSMBn0(p0gd%>TY0lmPebp-ODr&_`8M&^H z$Auk9uJRGXx1M8T^od)uO!hmo5l0)*zxM~3e{IqZ=`I~-2%lut#BN=-$NoNZ?H*%* zi1olXB8fVwZ-y4ezOH2*hre`!B- ze|$f5aW6tT=xpzL*M-f!AT!2EhOd7sJz!g+@)GrHOiC-o0|V2)D#H+{Gub&!|NXkJ zWk95^0ZPE09MV2=M$2AO_s3ieYxv;U)UUGon7!=FP2W$UD^~C^WLYW@$}`+mRG+Xt zA&V!%lU}O|6?#AVskqsnU1oSZdaj-YbIlJYw3l2Lqsh%9hDjqE?~ECX@6r0x}K`i|qzlu@ApbcK`uWdgoWA6cVebT=@a+o&xL zHRfdbBGS@ui#yG-SiD!Zo#(c~4m0p;!<-7xhRbin#o%KYBw)m|0x!;Vve2ye`boLw zT0_GQH1SX9fp4X1$@Ojeeg3U?tS%7xQVQEX(<3B;EaqLp4Rwm`GV4s_lA7uF?Kp{{ zc_$uBNhhylA(%;(7FI=TxpB19du+7kwXB$Xo&BWEZz%DyG~Rupo*vUzVw=93&<-37 zisJNkTdPFM0~S)>vj`uV{x1h#mA7J|rk#!p6N7hX&+F%b&S-mctCuA*JrAT==w!=n9iX-hxppgRhrB+Qo&v|`Nnw1k?# z)<6^E3ewB0d}SoXAB@;dldu=~m&YRTGu7Dan0Z0@YJhpPm6wT)&%<`OxX)?{k5Inn zW?^;b$ABj3h6q+A;?CQraja6GKg|mEsSO93o!fiF`#pW+!YI`E1r>RjFt3BT4n3WY zcw@K4;c9qw_)8N%g2ZALc++h7f_2mvSdI;mHxm2G_$^& zdel<{4ah^tGu6H&i_!38)u#wH*LfdS zzG1ju?Y(H)NWBO3i4o=Mu_AZk$Im<4JL0Sc{@9=#X1FS2BWu7%6h74i%s?jW?Q&jf zeu+T@J}Y60ZAP>I^|_?SB9K9ekA@g$YVI8YvmhSQ&0+iPEhdZ_Dxv%a41=as@hkfu zZlo`U?_HXNU*COy>h@mQO!Kr08b%#}8JRcQ|LqbB7>0BA6qB#kq*}taZsOGG`xq{g zlQQpYzsTXH<29}F6@@UN(7AHv?`)VLOJWc;_igFdHi1bjz>i2-_0Vw_tA5+#4-Zxh5`eu z*fKuzS22aCzVe4&54cVV!IUvb?2~!vK8e{7%}51DlkYL3LvzWt!L9jK0;kqjlz;<0 zQ{sbpVAN9-ofVJh1*)-TnRF$-|l_~PPcX#3$YMGN98 z#cKmeC>Qu7wqpm&5TXybLog_dw5u)rAKP+4XG%`;Kr)K*%Ad9EtEi$vp3-n{Y#NYZ zryVsKPk2@0{7yF24*=MeC-s82|Dfb_l3-6iIV@pGALV#y>N!50x1aoHoLzcAjk$4s z;Synrxi%A3*kcr>_<=+FllY!;}2wGr@aiA zj^M?76_e8IQnkk;C{{0ncHtHOSq6A5NF>jwO$kB%#aa}?{oV2GlCVyI8di#RgozZp z#dc&AQU$>R5tc5IrQp%6oPu*65X5w_Nm$gX=0#*8d!_6c@&BAod$7U(3hr#1niEM+n3^R%{HA zC{!j8P8QAp!93Ym#+VPZH8ATRqhXw_xxv`8hORVM-0|o)k(VAxj8{*a&rB~X;Y{3y zIqh`v{2{W0AWOk6^V&0R^bI_mf=6@%{${gdaUPa*KBN}Xhq+VS0TT`(>X+@=TQ5xS zQvz0DBRg5)6-7%~@bLLS%Wo{(T$pH-83sT5j&1hxHed8q3S5JY)S^G6=sBx0h1x3- z#~8#*QW{H^WnhcqKkw)*GCiEIo6kZsVO?Z_dB6_#ru;^{{E=)1q2(|g`|CWr%c zZy4?hW0OU4pSi-*zMdhg+X=}`6nw7cAcC{hPKR&109VihaX8gcNn&{yn7n8N3?q(B zd?@fq(hbKCRIl=WQP_)2JhLaCme#uxw?gdikzKj6T5HDZO~50~ek7wX(gdxR!SF)f zKE z=Q6&V%$qi*pucZcZWV{Uh?1dvc6HFinV5`xpXEzO(1Gj^0lj3|QWOVlZAqO~^6*yc zzJ&4-TVe{zEGJ)zxpvTo4S+sQsYphavSA12R4$oo&fC%78Df-xaVG4aH6(sp3_D3{ zhvI3?yYi&K@TQjG`m|Zn=jR(a^{4Jfj*Ce0)Gv6SmRFtItj?Q%X_{A}#62&L&+j*T zk662m8hA0xS6LDOBRQCY3G|{4#dU24&dST39sQbAr~dl9wHkF&&rYS^!3|RJmy%KYE!;OKy8btAslt)c);?)gnbaaL@tgp2N8O@$sGA; zraQ!QfV&{jyf*Mt1HbM-aF-;C=_D4-7AwDYGg38GNXDD6-Ip5|b4u)4IhhfoZ)+F{ z$uGa*4|!j*#X@TK<-ImgHJ)76yDN7-kmqv9aKiI63-Gc;Hkw5AY`>gnZa__?E zh}cBX1)aO;++kWuNR6-c9NkndW*F0_lCOXJ#*POnncELM$ zvR`aANyaRxBfi2i8ZlAM@byQWP4c>~l$US)i?kUB=4wbNJ9Pm3 zS&=<6%)oZxF}AxLG`#q#GH23u=ChfXZ(m+IJL;E$I;x*+YxgN|TK#?T9Y5be**F_t z9n#MCP~Yll!{GHyxM!_KY3WzCfV8412n<6nCocg^5U*d(#+hmN^|OzmxwC)UG6`Ua z5rasPT=T`Kgjdf*qqi)GXj-2P-+z{Zot1t5PLH&K*PF)8G9f-KDq3(?Hb-G)-f_q2 zV3YXvcsJasllMRXifX9iUN8106oUP-8FQ_zttwRUWTe13JvX7iuH$eg4ugMDS6lKG zhVlL}(E8a;qM-M=E3Dv_lioyy;D4w;n)N$d_XrE=vJm#E8K#EKvAYqt zJIf3)I?{#Sfs_6G#yv*%@*_P`RV=ZN0*#?DXv8{@@8yd#u{G?|zokd_!SzSmr@;m3 zNDN{^n8^zvt<$TuV7S$HwrNXsV>C%Ex4CXSarT{1t#fzXB1%}5!RvQIcusp`$;rOW z7BJeNx7K$vmt$RG=fFZ>Ep8vHt6k3WNV^sYwIyf!PjD_)jms$28>wjqI(pG>Pn_Zl zCX&QA&=f@ zk$paeDh3HiuFkQ;CMpm+M)D2cEu3?Heo1?B7~b(|T$C@2KV0tl#=z#wtqIvL@R9pn zt;6pq{!(sO=8Ng=B_&Fau^0T@aN-cJKR1k5+Besh&gc49>kh4sbnvRxXkdZ{C2$r8 z<;6MS(BM={(4_OdaB(vbsTHZg5+TR6R~q&(##2}JYM7JH8nXCdrbhKYYeToM23wzU z-cG3$_P1%tb%$sJ{^T}9WDZSJ{+1rs{I&FU0l>me25gy=M-27sPA4|M|G2LK5&PS; zyW6wB(f)EP3c}wdmX0VP53>inL?uArJMcozMG7EJE>^tpBjgJoR;WBCV*3V*PDG@t zA3n!5oX*F*^^zfWI&!`A55i+)2={z^pMH7_gDn?-mo^-OM%YzzeVrH% zp}Ce1OepjZeOP@gZ*83wvlTz-gM(oGBRRO=a~kvLk%2aNp~(KXf%Jf8+IULq)*E@& zfE4(2NQBIE=(5Spkc$>Upw;7PH0yJld%A|yA@%&teok-yE5oWahYMfsZJ7A-ChYQ@ zA|89n9Ee|$CUDR0H~aM%O=Y4FitMdnTAG*&`5W;FyE8mqKShref5*Y3+<4&U4R#P{D~cI=qe3T)55S-?n-5HZt+TQNmKFd*0cuY_oO>su5fE|rE2-~q$uBIV6LBQJ6#-mkER+45yIvanv8VAUr?{e z613nLB^SeJdnYSS5H*bz6_cGus7c*_PpmmT;kkJjk=ZcV&4}?5K)&kP;=c@le9{yS zXK$6jBTkTFXNo{C^7ZAmDgG%tb7dv}md*a}pf`e{QRX@Xzbzu3RD~99EV{RDZIxG0 z>^NQRKQz5iGtCUljH?+r;#R=4nY38D_`aWdiEwpcT$UYJ!BME35U<`aV{f(Do%taL z5xFv5P1IM2o2+#U6@*xGJi?Eu7-{y=zD3`>whew!E1zX6(Q%gJ@8K!RvCo;*TS`2a`*}qA2VySF|Kwgqk`G#=JPMlYQyi?`P?@4VNbYYJG_3I-_!FUs-C{6 z9sZ=oJ9kP#U7ny42_bFq_uta19+x$z+nkWO<_}@S2sX0b-L`6YeCbPBGRy4=iARqE zZqmrn_R)wlUEgPg-?Id;14(sln`!j6jaCz|;I(WLU%}mPT9AupZwu0Gv_jcaO()|P zl=q-2FbE+P&@)`RP_4+dtuV&LmtX#{Ik`aZM*7s-JmaZrGFejv-xI;C`U`uoU z{GY6=q3Z7%#uMEMTC4$}o#&Y_Z%gf{E>J1Djg$w(nm)$Fh^Yu;FDT3kIe2$`aK7Nn z({4>Mq*+e8i|vKk0g9DZkE|s9Z9xp!K;O0c`L&flY1l-7Um$=ZQ!2ZD6-cEi-R=E%vSX<{}z<~jsnJSkc;6b zBuLEl<6R=h_(hV%yl=lFUrc7vf?n%WJo>(9+DLog>Cg7Gih?;Tw&y2T{%6?+S23@Z zPmc)`yOD!PCkOJ*AC%wV#Z)}xEwJJKNGyMd;`}BmlFlWIOHlA0Jd-7yXPWWqe|=|7+9p?BsL+eFX6Qwy2IC0KTv9h zqd{5p@vaAxD4!(kTGQ#6<+{H0YxnNp6W?8~u>T&?U8jUw;lU2CA!}H-dIO-NY2Xn- zU1DCxIOWj)kv&&N!x22L52EzPer&2SWQqs-8+l^BscrK!68qCeYn&70flnnttKl_c z!A{6o9p5&6S;lw_7oG8($05`<>UbQ@+QESxe3Gc|>*Aee$a%T%>VP{{)P$M=H?V!* z4M6A#Pb-L;)G@hseY7$Erf?;xxizwP%iujH?4i7)FamkW;zNKTbhLf*;dxhj$j~qd z46wb|r8XPsihy8OCDP}a7TZ%7`h)-!Hdj?}|9DsBe24q&;$PsK#+xpRD7^T|0OXZ~Ipx5osC=N0T-tA30RMat0i+e@JKblI z_NTC&^x2MFt#1wiyPrCQS=&%8jL!1_?IICjZnAg0R4QZ$=g4aWK-h~Zzu|ye@s}M% zRy}L0QNqc&6XwfhAF6}2xU2@l29Y^}7HlAdo0PwoB@ghdq{5@lK5I^(fW?@XM9S3G zz!<^ugy>_9cVq~2sz~6uZcIUkJznEg()Rp0mB+b{cTeABWKG})T%?FAR`h`rB6g42 z^v_hf;g$hx`7^5pQI-I8SOh4&!V60RH2#5QsPI>EKDeO(k1q%b;cD+^d!aY)_xgOi_`El7YWLICxvwg88;}^Xhxivm~KzCv-EcE=@BGQQz|8ilt*9FgqpZ@lj zEUk0?q%$%n+!5Gl;vLD&J<}um^TcRWL*9lQOZN%h$ha7d`E9e*B;!FAOn46BnUXA_ z3p`35nBGg&WGnF)!5{!p<&oQ$lnjie=iXch3xuemBBrDNQ6W34FOP!T{4GG`9MO`J z6<|6&>4^Nm)O+ehZO~D#4us>M*AY)9$BK&V|2yp+bSW0PDT6W-FpG2Py~xU)L*+|v z*97j`oGkd{$~~PL({`}d;~2){toD)Go+i2Rxf)DPl0DsmNTTF|o#v0O4r=2#6m0!3 z%y!+ScJ<2}Jy;9H%Qc`=-S^%j4!o=9eu=Ypbiu_BA1hBB30r7$p0RuT96GUB;1{$d z-WBME%jfj`0X(!7O5ym30#bVZDW?#xFV~IbOsVYrMF}#Ce*jb|ATrx3 z1p|V{xkVw)PKspFdLqxx&yRRw+c9VFUc*O+KDV#xdZFai#;_j*(C>vbptnY+V6q7I;P@kOA4FJ zd9l03Y{X|>oh9FjO+8-E7hl|-eoJRSG%$1idD+tI56_B;IRNDQQsy}3A#^hjLW2f| zi#>3B1a?vlr615@Ir&Gp77W|LtegkKM@1Q_M^ ztPixM6gOGRe{Ts882JBBj(9+-T-Xn)u!{HG1iOBr-jpwwNE2=-P?_MQxUg>{DtVuq z4ExUgjKUXg1UZ2+E=U0%SY0}*aqQ1z6HmgN5;YbJ zc2>15xiCyk`pfKQfsDkznZGszoMoo9?Bn;6aJ1VP@LzQatP^;bqRV`X<(LJ zuU6zKlMkyMcU4gXO1O=6$i z41?hH%lnbvX4`&iz)@Xbeay%FGD>2&Di!G#RKnfe{)E>D)^ST9!|BDlMQn=%Q|%qw zK#TOxT$+&4^5r+)wnEE}R)JE`Mys6GbWv+MUzxR5&UFR-j<%35@Kh=1Qf5$W=^k6+ z1fMU3#31Ae#r{neagz6s7>5`b*qr~(0<_=}p7(2G1`;O~ob)&9TvgaxRI>TnCQ+V$ zZQXP+b>r9lnCdQ{9dw7}UwoGWK9?!BFoLd|LMxhA4i=TW2s@?KuhJZ_sy)2Rv&PG! zV4=LhZiMLffc%LZkVG#kw8`g`b?IWD%h(}zsYl3~ZTOo7$+hUIFqmJ$-{N-jFBjrY ze^NY>t}4j`O{%_-aihr@q-!td1loV?jb;h1PtA8;L8c33XEc^3M7us`p zgGv($Wl4;p=8nJ%`3rrS*iHe4S*zsJbnB;9OwY?7=IA7^C}Yr)<}3k+t@_o>%yZ0w z2ah;=<@5)NVKwXllCTJ{vG%HZCf@hMAvh|+oMJ6aT%S-0yFO^RI*!kySpB?dl=rsu zBogQ~Dh{dwb*J)(4%0q~PCO(3jmI0IhK@2%J@a4Jjd#~7S=*KqOUOGcn5zC1uIusb zmHmTR$f+s`-U~bqF^DenI{};(U4xQ<9|(K{7Q{%h?55pPI^$sRU~3X0poj0gg6-*^ zk;rI-<~_scspkJIW2#Th-u2#-A1x#nrv)>K55d^ouoUwzUAa0&?lN2fvSzOBl-y=; zqM-4d*pZe>HK6a89)qV9Hn@I=K_3o-fuIey9+M%t9&dmBK!_Bui-K%Ui16hasO$FB z@7|T&c=W(4Dvma?Q`=Rj=Ja^1SD|-%bWm+PWBFQ5+hF4m<-_TLUUrdWzf%^+E1qRv z&Ym>UJ5F2g`!O?o2mUtuZrlfkk|nSBXE)?E-64PW6`^5zqqagZs>xU zAGZ6;(-+U9a@u|tb8nLj74E6L9vSvlJq%^*xGAO!Wf-@}Nue~T6y$c4D$-3zVj?$r&G>mTVROf>7+flvicqQt2WrNAJNCmaTY^g#emd#j!D*OPHRp)NRM{4^jjVbO z#SfgO2|A{Rw1T>8J`{4ZID@vcYp^B*g2tR;8#$Yk=f z>SfmmajO=0u5Z@`F5V0ocR$^m*286!r|0TA?|l7L!IN$>S;M_sbM0G3$H&KSJ!p^Tc|#pIqSybMyi!w9vDn{PJ?gQI pZn1oaB^PHtYWnT*^!YKbyl`7$3d02tZ~%?rx}m87@#_7j{|`}C)E58% literal 0 HcmV?d00001 diff --git a/monitor/static/index.css b/monitor/static/index.css index 87a3a28..1bdb589 100644 --- a/monitor/static/index.css +++ b/monitor/static/index.css @@ -56,6 +56,17 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; } margin-bottom: 10px; display: block; } + +/* Specific colors for different stat cards */ +.stat-card:nth-child(4) i { color: #007bff; } /* Total - Blue */ +.stat-card:nth-child(4):hover { background: linear-gradient(135deg, #f0f7ff, #e6f0fb); } +.stat-card:nth-child(1) i { color: #17a2b8; } /* Active - Cyan */ +.stat-card:nth-child(1):hover { background: linear-gradient(135deg, #e3fafd, #d1f2f6); } +.stat-card:nth-child(2) i { color: #28a745; } /* Completed - Green */ +.stat-card:nth-child(2):hover { background: linear-gradient(135deg, #e6f9ea, #d4f7db); } +.stat-card:nth-child(3) i { color: #dc3545; } /* Error - Red */ +.stat-card:nth-child(3):hover { background: linear-gradient(135deg, #feeaec, #fcd8db); } + .stat-card span { font-size: 2em; font-weight: 600; @@ -162,11 +173,12 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; } padding: 20px; transition: all 0.4s cubic-bezier(.4,0,.2,1); opacity: 1; - max-height: 2000px; + max-height: none; + overflow-y: auto; } .task-type.collapsed .tasks-container { - max-height: 0; + max-height: 0 !important; opacity: 0; padding: 0; overflow: hidden; @@ -187,6 +199,9 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; } position: relative; z-index: 2; } +.task-card:last-child { + margin-bottom: 5px; +} .task-card:hover { box-shadow: 0 10px 30px rgba(0,123,255,0.12); transform: translateY(-3px); } .task-header { display: flex; justify-content: space-between; margin-bottom: 14px; align-items: center; } .task-title { font-size: 1.2em; font-weight: 600; color: #1a237e; } @@ -196,6 +211,8 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; } .status-running { background: linear-gradient(135deg, #e3f2fd, #bbdefb); color: #0d47a1; } .status-completed { background: linear-gradient(135deg, #e8f5e9, #c8e6c9); color: #1b5e20; } .status-error { background: linear-gradient(135deg, #ffebee, #ffcdd2); color: #b71c1c; } +.status-unknown { background: linear-gradient(135deg, #e0e0e0, #bdbdbd); color: #424242; } +.status-done-max-steps { background: linear-gradient(135deg, #e8f5e9, #c8e6c9); color: #1b5e20; } .task-details { margin-top: 16px; } .progress-bar { height: 12px; background-color: #eef2f7; border-radius: 6px; margin-top: 10px; overflow: hidden; box-shadow: inset 0 1px 3px rgba(0,0,0,0.1); } .progress-fill { height: 100%; background: linear-gradient(90deg, #007bff, #00c6ff); width: 0%; transition: width 0.6s ease; } @@ -302,3 +319,22 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; } color: #0078d7; } +/* Custom scrollbar for tasks container */ +.tasks-container::-webkit-scrollbar { + width: 8px; +} + +.tasks-container::-webkit-scrollbar-track { + background: #f1f5f9; + border-radius: 4px; +} + +.tasks-container::-webkit-scrollbar-thumb { + background: #c0d6e8; + border-radius: 4px; +} + +.tasks-container::-webkit-scrollbar-thumb:hover { + background: #a5c7e5; +} + diff --git a/monitor/static/index.js b/monitor/static/index.js index 6769bb3..a8ef7b5 100644 --- a/monitor/static/index.js +++ b/monitor/static/index.js @@ -5,6 +5,7 @@ document.addEventListener('DOMContentLoaded', () => { document.getElementById('total-tasks').parentElement.addEventListener('click', () => setTaskFilter('all')); document.getElementById('active-tasks').parentElement.addEventListener('click', () => setTaskFilter('active')); document.getElementById('completed-tasks').parentElement.addEventListener('click', () => setTaskFilter('completed')); + document.getElementById('error-tasks').parentElement.addEventListener('click', () => setTaskFilter('error')); }); let allTaskData = null; @@ -49,6 +50,8 @@ function setTaskFilter(filter) { document.getElementById('active-tasks').parentElement.classList.add('selected'); } else if (filter === 'completed') { document.getElementById('completed-tasks').parentElement.classList.add('selected'); + } else if (filter === 'error') { + document.getElementById('error-tasks').parentElement.classList.add('selected'); } } @@ -57,14 +60,17 @@ function updateStatistics(data) { let totalTasks = 0; let activeTasks = 0; let completedTasks = 0; + let errorTasks = 0; Object.entries(data).forEach(([taskType, tasks]) => { totalTasks += tasks.length; tasks.forEach(task => { if (task.status.status === 'Running' || task.status.status === 'Preparing' || task.status.status === 'Initializing') { activeTasks++; - } else if (task.status.status === 'Done') { + } else if (task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)') { completedTasks++; + } else if (task.status.status === 'Error') { + errorTasks++; } }); }); @@ -72,6 +78,19 @@ function updateStatistics(data) { document.getElementById('total-tasks').textContent = totalTasks; document.getElementById('active-tasks').textContent = activeTasks; document.getElementById('completed-tasks').textContent = completedTasks; + document.getElementById('error-tasks').textContent = errorTasks; + + // 高亮显示当前选中的统计卡片 + document.querySelectorAll('.stat-card').forEach(card => card.classList.remove('selected')); + if (currentFilter === 'all') { + document.getElementById('total-tasks').parentElement.classList.add('selected'); + } else if (currentFilter === 'active') { + document.getElementById('active-tasks').parentElement.classList.add('selected'); + } else if (currentFilter === 'completed') { + document.getElementById('completed-tasks').parentElement.classList.add('selected'); + } else if (currentFilter === 'error') { + document.getElementById('error-tasks').parentElement.classList.add('selected'); + } } function renderTasks(data) { @@ -86,7 +105,9 @@ function renderTasks(data) { if (currentFilter === 'active') { filteredTasks = tasks.filter(task => ['Running', 'Preparing', 'Initializing'].includes(task.status.status)); } else if (currentFilter === 'completed') { - filteredTasks = tasks.filter(task => task.status.status === 'Done'); + filteredTasks = tasks.filter(task => task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)'); + } else if (currentFilter === 'error') { + filteredTasks = tasks.filter(task => task.status.status === 'Error'); } if (filteredTasks.length > 0) { filteredData[taskType] = filteredTasks; @@ -107,7 +128,7 @@ function renderTasks(data) { tasks.forEach(task => { if (task.status.status === 'Running' || task.status.status === 'Preparing' || task.status.status === 'Initializing') { runningCount++; - } else if (task.status.status === 'Done') { + } else if (task.status.status === 'Done' || task.status.status === 'Done (Message Exit)' || task.status.status === 'Done (Max Steps)') { completedCount++; } else if (task.status.status === 'Error') { errorCount++; @@ -146,6 +167,12 @@ function renderTasks(data) { noTasks.innerHTML = ' No Tasks Available'; tasksContainer.appendChild(noTasks); } else { + // Add scrolling for large task lists + if (tasks.length > 10) { + tasksContainer.style.maxHeight = '600px'; + tasksContainer.style.overflowY = 'auto'; + } + tasks.forEach(task => { const taskCard = document.createElement('div'); taskCard.className = 'task-card'; @@ -178,6 +205,8 @@ function renderTasks(data) { statusIcon = 'fa-running'; break; case 'Done': + case 'Done (Message Exit)': + case 'Done (Max Steps)': statusClass = 'status-completed'; statusIcon = 'fa-check-circle'; break; @@ -185,6 +214,10 @@ function renderTasks(data) { statusClass = 'status-error'; statusIcon = 'fa-exclamation-circle'; break; + default: + statusClass = 'status-unknown'; + statusIcon = 'fa-question-circle'; + break; } taskStatus.classList.add(statusClass); @@ -202,7 +235,7 @@ function renderTasks(data) { if (task.status.progress > 0) { const progressText = document.createElement('div'); - progressText.innerHTML = ` Progress: ${task.status.progress} step(s)`; + progressText.innerHTML = ` Progress: ${task.status.progress}/${task.status.max_steps} step(s)`; taskProgress.appendChild(progressText); const progressBar = document.createElement('div'); diff --git a/monitor/static/task_detail.css b/monitor/static/task_detail.css index c399504..06da1cb 100644 --- a/monitor/static/task_detail.css +++ b/monitor/static/task_detail.css @@ -49,6 +49,11 @@ h2 { color: #0056b3; margin-top: 36px; font-size: 1.6em; font-weight: 600; } .step-card { border: none; background: #fafdff; + box-shadow: 0 4px 15px rgba(0,0,0,0.08); + margin-bottom: 25px; + border-radius: 10px; + overflow: hidden; + transition: all 0.3s; padding: 22px 26px; margin-bottom: 24px; border-radius: 10px; @@ -57,19 +62,29 @@ h2 { color: #0056b3; margin-top: 36px; font-size: 1.6em; font-weight: 600; } position: relative; overflow: hidden; } +.step-intent { + padding: 10px 20px; + background: #f0f7ff; + border-left: 4px solid #4285f4; + margin: 10px 20px; + font-size: 0.95em; + line-height: 1.5; + color: #333; +} +.exit-condition { + background: #fff8e1; + padding: 8px 12px; + border-radius: 6px; + font-family: 'Courier New', monospace; + font-size: 0.9em; + border-left: 3px solid #ffa000; +} + .step-card:hover { box-shadow: 0 10px 30px rgba(0,123,255,0.1); transform: translateY(-3px); } -.step-card:before { - content: ''; - position: absolute; - left: 0; - top: 0; - height: 100%; - width: 4px; - background: linear-gradient(to bottom, #007bff, #00c6ff); -} + .step-header { display: flex; justify-content: space-between; margin-bottom: 12px; align-items: center; } .step-title { font-weight: 600; color: #1a237e; font-size: 1.1em; } .step-time { color: #6c757d; font-size: 0.92em; } @@ -90,10 +105,7 @@ pre { box-shadow: 0 5px 15px rgba(0,0,0,0.08); transition: all 0.3s; } -.step-image:hover { - transform: scale(1.01); - box-shadow: 0 8px 25px rgba(0,0,0,0.12); -} + .no-steps { color: #8492a6; font-style: italic; @@ -154,5 +166,142 @@ pre { .status-not-started { background: linear-gradient(135deg, #f0f0f0, #e6e6e6); color: #555; } .status-preparing, .status-initializing { background: linear-gradient(135deg, #fff7e0, #ffe8a3); color: #8a6d00; } .status-running { background: linear-gradient(135deg, #e3f2fd, #bbdefb); color: #0d47a1; } -.status-done { background: linear-gradient(135deg, #e8f5e9, #c8e6c9); color: #1b5e20; } +.status-done, .status-done-message-exit, .status-done-max-steps { background: linear-gradient(135deg, #e8f5e9, #c8e6c9); color: #1b5e20; } .status-error { background: linear-gradient(135deg, #ffebee, #ffcdd2); color: #b71c1c; } + +.step-intent { + padding: 10px 20px; + background: #f0f7ff; + border-left: 4px solid #4285f4; + margin: 10px 0; + font-size: 0.95em; + line-height: 1.5; + color: #333; +} + +.exit-condition { + background: #fff8e1; + padding: 8px 12px; + border-radius: 6px; + font-family: 'Courier New', monospace; + font-size: 0.9em; + border-left: 3px solid #ffa000; + position: relative; +} + +.exit-message { + background: #e8f5e9; + padding: 12px 16px; + border-radius: 6px; + font-family: 'Segoe UI', Arial, sans-serif; + font-size: 1em; + border-left: 3px solid #4caf50; + position: relative; + line-height: 1.5; + color: #1b5e20; + margin-top: 4px; + box-shadow: 0 2px 5px rgba(0,0,0,0.05); +} + +.exit-condition-help { + margin-top: 8px; + font-family: 'Segoe UI', Arial, sans-serif; + font-size: 0.85em; + color: #666; + background: #f5f5f5; + padding: 6px 10px; + border-radius: 4px; + border-left: 2px solid #9e9e9e; +} + +/* 工具提示样式 */ +.tooltip { + position: relative; + display: inline-block; + margin-left: 8px; + cursor: help; +} + +.tooltip .tooltip-text { + visibility: hidden; + min-width: 200px; + max-width: 500px; + width: max-content; + background-color: #333; + color: #fff; + text-align: left; + border-radius: 6px; + padding: 10px 12px; + position: absolute; + z-index: 10; + bottom: 125%; + left: 50%; + transform: translateX(-50%); + opacity: 0; + transition: opacity 0.3s; + font-weight: normal; + font-size: 0.85em; + white-space: normal; + word-wrap: break-word; + line-height: 1.4; + box-shadow: 0 2px 10px rgba(0,0,0,0.2); +} + +.tooltip .tooltip-text::after { + content: ""; + position: absolute; + top: 100%; + left: 50%; + margin-left: -5px; + border-width: 5px; + border-style: solid; + border-color: #333 transparent transparent transparent; +} + +.tooltip:hover .tooltip-text { + visibility: visible; + opacity: 1; +} + +/* 移动设备上的工具提示调整 */ +@media (max-width: 768px) { + .tooltip .tooltip-text { + width: auto; + max-width: 250px; + left: auto; + right: 0; + transform: none; + } + + .tooltip .tooltip-text::after { + left: auto; + right: 10px; + } +} + +/* 进度条样式 */ +.progress-bar { + height: 12px; + background-color: #eef2f7; + border-radius: 6px; + margin: 10px 0; + overflow: hidden; + box-shadow: inset 0 1px 3px rgba(0,0,0,0.1); + width: 100%; + max-width: 300px; +} + +.progress-fill { + height: 100%; + background: linear-gradient(90deg, #007bff, #00c6ff); + width: 0%; + transition: width 0.6s ease; +} + +.progress-percentage { + text-align: right; + font-size: 0.85em; + color: #6c757d; + margin-top: 4px; + font-weight: normal; +} diff --git a/monitor/templates/index.html b/monitor/templates/index.html index ceffbea..4388f64 100644 --- a/monitor/templates/index.html +++ b/monitor/templates/index.html @@ -4,6 +4,9 @@ OSWorld Monitor + + + @@ -12,11 +15,6 @@

OSWorld Monitor System Online

-
- - Loading... -
Total Tasks
-
Loading... @@ -27,6 +25,16 @@ Loading...
Completed
+
+ + Loading... +
Error
+
+
+ + Loading... +
Total Tasks
+
diff --git a/monitor/templates/task_detail.html b/monitor/templates/task_detail.html index fd5fb8f..8fcd26c 100644 --- a/monitor/templates/task_detail.html +++ b/monitor/templates/task_detail.html @@ -4,6 +4,9 @@ Task Detail: {{ task_id }} + + + @@ -21,11 +24,41 @@
Instruction
{{ task_info.instruction }}
Status
-
{{ task_status.status }}
+
+ {{ task_status.status }} + {% if task_status.status == 'Error' %} + + + Error occurred during task execution + + {% elif task_status.status == 'Done (Message Exit)' %} + + + Task completed with a message exit condition + + {% elif task_status.status == 'Done (Max Steps)' %} + + + Maximum steps reached, task completed + + {% endif %} +
Current Step
{{ task_status.progress }}
Last Update
{{ task_status.last_update or 'None' }}
+ {% if task_status.log_data and task_status.log_data.exit_condition %} +
Exit Condition
+
+ {{ task_status.log_data.exit_condition }} +
+ {% endif %} + {% if task_status.status == 'Done (Message Exit)' and task_status.log_data and task_status.log_data.last_message %} +
Exit Message
+
+ {{ task_status.log_data.last_message }} +
+ {% endif %}
Result
{{ task_status.result }}
@@ -40,7 +73,15 @@
Step {{ step.step_num }}
{{ step.action_timestamp }}
-
{{ step.action.action }}
+ {% if task_status.log_data and task_status.log_data.agent_responses and loop.index0 < task_status.log_data.agent_responses|length %} +
+ Agent Intent: {{ task_status.log_data.agent_responses[loop.index0] }} +
+ {% endif %} +
{% if step.action and step.action.action %}{{ step.action.action }}
+                            {% elif step.Error %}Error: {{ step.Error }}
+                            {% else %}{{ step|tojson }}
+                            {% endif %}
{% if step.screenshot_file %}