Merge branch 'main' into zdy

This commit is contained in:
David Chang
2024-01-05 16:14:38 +08:00
18 changed files with 491 additions and 50 deletions

5
.gitignore vendored
View File

@@ -1,3 +1,7 @@
# Model checkpoints
*.pth
*.pt
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/
*.py[cod] *.py[cod]
@@ -166,3 +170,4 @@ tags
tags-opts tags-opts
snapshots snapshots
*.syncthing.*.tmp *.syncthing.*.tmp
cache

View File

@@ -82,7 +82,8 @@ class PythonController:
y = parameters["y"] y = parameters["y"]
if "num_clicks" in parameters: if "num_clicks" in parameters:
num_clicks = parameters["num_clicks"] num_clicks = parameters["num_clicks"]
self.execute_python_command(f"pyautogui.click(button='{button}', x={x}, y={y}, clicks={num_clicks})") self.execute_python_command(
f"pyautogui.click(button='{button}', x={x}, y={y}, clicks={num_clicks})")
else: else:
self.execute_python_command(f"pyautogui.click(button='{button}', x={x}, y={y})") self.execute_python_command(f"pyautogui.click(button='{button}', x={x}, y={y})")
elif "button" in parameters and "x" not in parameters and "y" not in parameters: elif "button" in parameters and "x" not in parameters and "y" not in parameters:
@@ -145,7 +146,8 @@ class PythonController:
if "x" in parameters and "y" in parameters: if "x" in parameters and "y" in parameters:
x = parameters["x"] x = parameters["x"]
y = parameters["y"] y = parameters["y"]
self.execute_python_command(f"pyautogui.dragTo({x}, {y}, duration=1.0, button='left', mouseDownUp=True)") self.execute_python_command(
f"pyautogui.dragTo({x}, {y}, duration=1.0, button='left', mouseDownUp=True)")
elif action_type == "SCROLL": elif action_type == "SCROLL":
# todo: check if it is related to the operating system, as https://github.com/TheDuckAI/DuckTrack/blob/main/ducktrack/playback.py pointed out # todo: check if it is related to the operating system, as https://github.com/TheDuckAI/DuckTrack/blob/main/ducktrack/playback.py pointed out
@@ -208,3 +210,16 @@ class PythonController:
else: else:
raise Exception(f"Unknown action type: {action_type}") raise Exception(f"Unknown action type: {action_type}")
def get_vlc_status(self, host='localhost', port=8080, password='password'):
url = f'http://{host}:{port}/requests/status.xml'
response = requests.get(url, auth=('', password))
if response.status_code == 200:
print("File downloaded successfully")
return response.content
else:
print("Failed to get vlc status. Status code:", response.status_code)
return None

View File

@@ -14,12 +14,11 @@ logger = logging.getLogger("desktopenv.setup")
import traceback import traceback
class SetupController: class SetupController:
def __init__( self def __init__(self, http_server: str, cache_dir: str):
, http_server: str self.http_server: str = http_server
, cache_dir: str self.http_server_setup_root = http_server + "/setup"
):
self.http_server = http_server + "/setup"
self.cache_dir: str = cache_dir self.cache_dir: str = cache_dir
def reset_cache_dir(self, cache_dir: str): def reset_cache_dir(self, cache_dir: str):
self.cache_dir = cache_dir self.cache_dir = cache_dir
@@ -52,6 +51,33 @@ class SetupController:
# self._open_setup(config) # self._open_setup(config)
# can add other setup steps # can add other setup steps
# ZDY_COMMENT: merged with launch
#def _command_setup(self, command: str):
#"""
#Directly send a command into the virtual machine os for setting up.
#"""
#payload = json.dumps({"command": command})
#headers = {
#'Content-Type': 'application/json'
#}
#timeout = 5
#timout_whitelist = ["vlc"]
#
#try:
#
#response = requests.post(self.http_server + "/execute", headers=headers, data=payload, timeout=timeout)
#if response.status_code == 200:
#print("Command executed successfully:", response.text)
#else:
#print("Failed to execute command. Status code:", response.status_code)
#except requests.exceptions.Timeout as e:
#if command in timout_whitelist:
#print("Command executed successfully:", command)
#else:
#print("An error occurred while trying to execute the command:", e)
#except requests.exceptions.RequestException as e:
#print("An error occurred while trying to execute the command:", e)
def _download_setup(self, files: List[Dict[str, str]]): def _download_setup(self, files: List[Dict[str, str]]):
""" """
Args: Args:
@@ -70,12 +96,9 @@ class SetupController:
for f in files: for f in files:
url: str = f["url"] url: str = f["url"]
path: str = f["path"] path: str = f["path"]
cache_path: str = os.path.join( self.cache_dir cache_path: str = os.path.join(self.cache_dir, "{:}_{:}".format(
, "{:}_{:}".format( uuid.uuid5(uuid.NAMESPACE_URL, url),
uuid.uuid5(uuid.NAMESPACE_URL, url) os.path.basename(path)))
, os.path.basename(path)
)
)
if not url or not path: if not url or not path:
raise Exception(f"Setup Download - Invalid URL ({url}) or path ({path}).") raise Exception(f"Setup Download - Invalid URL ({url}) or path ({path}).")
@@ -101,22 +124,22 @@ class SetupController:
if not downloaded: if not downloaded:
raise requests.RequestException(f"Failed to download {url}. No retries left. Error: {e}") raise requests.RequestException(f"Failed to download {url}. No retries left. Error: {e}")
#payload = json.dumps({"url": url, "path": path}) # payload = json.dumps({"url": url, "path": path})
#headers = { # headers = {
#'Content-Type': 'application/json' # 'Content-Type': 'application/json'
#} # }
form = MultipartEncoder( { "file_path": path form = MultipartEncoder({
, "file_data": (os.path.basename(path), open(cache_path, "rb")) "file_path": path,
} "file_data": (os.path.basename(path), open(cache_path, "rb"))
) })
headers = {"Content-Type": form.content_type} headers = {"Content-Type": form.content_type}
logger.debug(form.content_type) logger.debug(form.content_type)
# send request to server to upload file # send request to server to upload file
try: try:
logger.debug("REQUEST ADDRESS: %s", self.http_server + "/upload") logger.debug("REQUEST ADDRESS: %s", self.http_server_setup_root + "/upload")
response = requests.post(self.http_server + "/upload", headers=headers, data=form) response = requests.post(self.http_server_setup_root + "/upload", headers=headers, data=form)
if response.status_code == 200: if response.status_code == 200:
logger.info("Command executed successfully: %s", response.text) logger.info("Command executed successfully: %s", response.text)
else: else:
@@ -141,7 +164,7 @@ class SetupController:
# send request to server to change wallpaper # send request to server to change wallpaper
try: try:
response = requests.post(self.http_server + "/change_wallpaper", headers=headers, data=payload) response = requests.post(self.http_server_setup_root + "/change_wallpaper", headers=headers, data=payload)
if response.status_code == 200: if response.status_code == 200:
logger.info("Command executed successfully: %s", response.text) logger.info("Command executed successfully: %s", response.text)
else: else:
@@ -168,7 +191,7 @@ class SetupController:
# send request to server to open file # send request to server to open file
try: try:
response = requests.post(self.http_server + "/open_file", headers=headers, data=payload) response = requests.post(self.http_server_setup_root + "/open_file", headers=headers, data=payload)
if response.status_code == 200: if response.status_code == 200:
logger.info("Command executed successfully: %s", response.text) logger.info("Command executed successfully: %s", response.text)
else: else:
@@ -184,7 +207,7 @@ class SetupController:
headers = {"Content-Type": "application/json"} headers = {"Content-Type": "application/json"}
try: try:
response = requests.post(self.http_server + "/launch", headers=headers, data=payload) response = requests.post(self.http_server_setup_root + "/launch", headers=headers, data=payload)
if response.status_code == 200: if response.status_code == 200:
logger.info("Command executed successfully: %s", response.text) logger.info("Command executed successfully: %s", response.text)
else: else:
@@ -200,7 +223,7 @@ class SetupController:
headers = {"Content-Type": "application/json"} headers = {"Content-Type": "application/json"}
try: try:
response = requests.post(self.http_server + "/execute", headers=headers, data=payload) response = requests.post(self.http_server_setup_root + "/execute", headers=headers, data=payload)
if response.status_code == 200: if response.status_code == 200:
results: Dict[str, str] = response.json() results: Dict[str, str] = response.json()
if stdout: if stdout:

View File

@@ -76,7 +76,8 @@ class DesktopEnv(gym.Env):
# Initialize emulator and controller # Initialize emulator and controller
logger.info("Initializing...") logger.info("Initializing...")
self._start_emulator() self._start_emulator()
self.host = f"http://{self._get_vm_ip()}:5000" self.vm_ip = self._get_vm_ip()
self.host = f"http://{self.vm_ip}:5000"
self.controller = PythonController(http_server=self.host) self.controller = PythonController(http_server=self.host)
self.setup_controller = SetupController(http_server=self.host, cache_dir=self.cache_dir) self.setup_controller = SetupController(http_server=self.host, cache_dir=self.cache_dir)

View File

@@ -1,2 +1,3 @@
from .file import get_cloud_file, get_vm_file, get_cache_file from .file import get_cloud_file, get_vm_file, get_cache_file
from .misc import get_rule from .misc import get_rule
from .vlc import get_vlc_playing_info

View File

@@ -3,6 +3,7 @@ from typing import Dict
import os import os
import requests import requests
def get_cloud_file(env, config: Dict[str, str]) -> str: def get_cloud_file(env, config: Dict[str, str]) -> str:
""" """
Config: Config:
@@ -25,6 +26,7 @@ def get_cloud_file(env, config: Dict[str, str]) -> str:
return _path return _path
def get_vm_file(env, config: Dict[str, str]) -> str: def get_vm_file(env, config: Dict[str, str]) -> str:
""" """
Config: Config:

View File

@@ -0,0 +1,20 @@
import os
from typing import Dict
def get_vlc_playing_info(env, config: Dict[str, str]):
"""
Gets the current playing information from VLC's HTTP interface.
"""
_path = os.path.join(env.cache_dir, config["dest"])
host = env.vm_ip
port = 8080
password = 'password'
content = env.controller.get_vlc_status(host, port, password)
print("content: ", content)
with open(_path, "wb") as f:
f.write(content)
return _path

View File

@@ -5,4 +5,5 @@ from .docs import compare_font_names, compare_subscript_contains, has_page_numbe
from .docs import is_first_line_centered, check_file_exists, compare_contains_image from .docs import is_first_line_centered, check_file_exists, compare_contains_image
from .pdf import check_pdf_pages from .pdf import check_pdf_pages
from .libreoffice import check_libre_locale from .libreoffice import check_libre_locale
#from .vlc import is_vlc_playing
from .general import check_csv from .general import check_csv

View File

@@ -0,0 +1,22 @@
import os
def get_gimp_export_path():
# Path to GIMP's configuration file. This example assumes GIMP version 2.10.
# You need to adjust the path according to the GIMP version and user's file system.
gimp_config_file = os.path.expanduser("~/.config/GIMP/2.10/gimprc")
try:
# Open and read the configuration file
with open(gimp_config_file, 'r') as file:
for line in file:
# Search for the default export path setting
if "default-export-path" in line:
# Extract the current path from the line (assuming it's enclosed in quotes)
current_path = line.split('"')[1]
# Compare the current path with the expected path
return current_path
except FileNotFoundError:
# Handle the case where the configuration file is not found
print("GIMP configuration file not found")
return False

View File

@@ -1,14 +1,14 @@
import os import os
import platform import platform
import requests
from xml.etree import ElementTree from xml.etree import ElementTree
import pygetwindow as gw import pygetwindow as gw
import pyautogui import pyautogui
from typing import Dict
import logging import logging
logger = logging.getLogger("desktopenv.metrics.vlc") logger = logging.getLogger("desktopenv.metrics.vlc")
def read_vlc_config(setting_name): def get_vlc_config(setting_name):
""" """
Reads the VLC configuration file to check for a specific setting. Reads the VLC configuration file to check for a specific setting.
@@ -41,24 +41,22 @@ def read_vlc_config(setting_name):
return None return None
def get_vlc_playing_info(host='localhost', port=8080, password='password'): def is_vlc_playing(actual: str, rule: Dict[str, str]) -> float:
""" """
Gets the current playing information from VLC's HTTP interface. Checks if VLC is currently playing a file.
""" """
url = f'http://{host}:{port}/requests/status.xml' with open(actual, 'rb') as file:
try: actual_status = file.read().decode('utf-8')
response = requests.get(url, auth=('', password))
if response.status_code == 200:
tree = ElementTree.fromstring(response.content)
status = tree.find('state').text
if status == 'playing':
file_info = tree.find('information/category[@name="meta"]/info[@name="filename"]').text
return status, file_info
return status, None
except Exception as e:
logger.error(f"Error: {e}")
return None, None tree = ElementTree.fromstring(actual_status)
status = tree.find('state').text
if status == 'playing':
file_info = tree.find('information/category[@name="meta"]/info[@name="filename"]').text
print("file_info: ", file_info)
if file_info:
return 1 if file_info.endswith(rule['expected']) else 0
else:
return 0
def is_vlc_fullscreen(): def is_vlc_fullscreen():
@@ -86,5 +84,3 @@ def is_vlc_fullscreen():
except Exception as e: except Exception as e:
logger.error(f"An error occurred: {e}") logger.error(f"An error occurred: {e}")
return False return False

View File

@@ -1,12 +1,42 @@
{ {
"id": "59f21cfb-0120-4326-b255-a5b827b38967", "id": "59f21cfb-0120-4326-b255-a5b827b38967",
"snapshot": "base_setup", "snapshot": "base_setup",
"instruction": "Could you help me play the file at FILE_PATH?", "instruction": "Play the music video on my desktop",
"source": "https://docs.videolan.me/vlc-user/desktop/3.0/en/basic/media.html#playing-a-file", "source": "https://docs.videolan.me/vlc-user/desktop/3.0/en/basic/media.html#playing-a-file",
"config": [], "config": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=14-vhVMVw53e0l-MDVBFbngFAE1jMqvgm&export=download&authuser=0&confirm=t&uuid=d31607ed-0075-4fe5-b68c-b24b6eec356e&at=APZUnTV0Wy0672VFGrQChgHmd1Ba:1704337791613",
"path": "Desktop/Rick Astley - Never Gonna Give You Up (Official Music Video).mp4"
}
]
}
},
{
"type": "launch",
"parameters": {
"command": "vlc"
}
}
],
"trajectory": "trajectories/", "trajectory": "trajectories/",
"related_apps": [ "related_apps": [
"vlc" "vlc"
], ],
"evaluator": "evaluation_dir" "evaluator": {
"func": "is_vlc_playing",
"expected": {
"type": "rule",
"rules": {
"file_path": "Desktop/Rick Astley - Never Gonna Give You Up (Official Music Video).mp4"
}
},
"result": {
"type": "vlc_playing_info",
"dest": "status.xml"
}
}
} }

View File

View File

@@ -0,0 +1,8 @@
1. Get the URLs from majestic_million and save them to `majestic_million.csv`
```bash
python3 majestic_million.py
```
2. Run scrapy spider to get the data from the URLs
```bash
python scrapy_crawler.py
```

View File

@@ -0,0 +1,158 @@
(() => {
let labels = [];
let selector_id_table = {};
var generateQuerySelector = function (el) {
function cssEscape(value) {
if (!value) return '';
// Escape all CSS special characters, including the colon.
return value.replace(/([!"#$%&'()*+,./:;<=>?@[\]^`{|}~])/g, '\\$&');
}
function getChildIndex(el) {
var siblings = Array.from(el.parentNode.children);
var sameTagSiblings = siblings.filter(sibling => sibling.tagName === el.tagName);
return sameTagSiblings.indexOf(el);
}
if (el.tagName.toLowerCase() === "html") {
return "HTML";
}
var str = el.tagName;
var idPresent = false; // Add a flag to check if an ID is present
if (el.id !== "") {
str += "#" + cssEscape(el.id);
idPresent = true; // Set the flag to true if there's an ID
}
if (el.className) {
var classes = el.className.split(/\s+/).filter(Boolean); // Filter out empty strings
for (var i = 0; i < classes.length; i++) {
str += "." + cssEscape(classes[i]);
}
}
// Only add :nth-of-type() if no ID is present
if (!idPresent) {
str += ":nth-of-type(" + (getChildIndex(el) + 1) + ")";
}
// Use '>' combinator if parent is not 'HTML'
var parentSelector = generateQuerySelector(el.parentNode);
return parentSelector === "HTML" ? str : parentSelector + " > " + str;
}
function unmarkPage() {
for (const label of labels) {
document.body.removeChild(label);
}
labels = [];
}
// Expose the unmarkPage function globally
window.unmarkPage = unmarkPage;
function markPage() {
unmarkPage();
var bodyRect = document.body.getBoundingClientRect();
var items = Array.prototype.slice.call(
document.querySelectorAll('*')
).map(function (element) {
var vw = Math.max(document.documentElement.clientWidth || 0, window.innerWidth || 0);
var vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
var rects = [...element.getClientRects()].filter(bb => {
var center_x = bb.left + bb.width / 2;
var center_y = bb.top + bb.height / 2;
var elAtCenter = document.elementFromPoint(center_x, center_y);
return elAtCenter === element || element.contains(elAtCenter)
}).map(bb => {
const rect = {
left: Math.max(0, bb.left),
top: Math.max(0, bb.top),
right: Math.min(vw, bb.right),
bottom: Math.min(vh, bb.bottom)
};
return {
...rect,
width: rect.right - rect.left,
height: rect.bottom - rect.top
}
});
var area = rects.reduce((acc, rect) => acc + rect.width * rect.height, 0);
return {
element: element,
include:
(element.tagName === "INPUT" || element.tagName === "TEXTAREA" || element.tagName === "SELECT") ||
(element.tagName === "BUTTON" || element.tagName === "A" || (element.onclick != null) || window.getComputedStyle(element).cursor == "pointer") ||
(element.tagName === "IFRAME" || element.tagName === "VIDEO")
,
area,
rects,
text: element.textContent.trim().replace(/\s{2,}/g, ' ')
};
}).filter(item =>
item.include && (item.area >= 20)
);
// Only keep inner clickable items
items = items.filter(x => !items.some(y => x.element.contains(y.element) && !(x == y)))
// Function to generate random colors
function getRandomColor() {
var letters = '0123456789ABCDEF';
var color = '#';
for (var i = 0; i < 6; i++) {
color += letters[Math.floor(Math.random() * 16)];
}
return color;
}
// Lets create a floating border on top of these elements that will always be visible
items.forEach(function (item, index) {
selector_id_table[index.toString()] = item.rects;
item.rects.forEach((bbox) => {
newElement = document.createElement("div");
var borderColor = getRandomColor();
newElement.style.outline = `2px dashed ${borderColor}`;
newElement.style.position = "fixed";
newElement.style.left = bbox.left + "px";
newElement.style.top = bbox.top + "px";
newElement.style.width = bbox.width + "px";
newElement.style.height = bbox.height + "px";
newElement.style.pointerEvents = "none";
newElement.style.boxSizing = "border-box";
newElement.style.zIndex = 2147483647;
// newElement.style.background = `${borderColor}80`;
// Add floating label at the corner
var label = document.createElement("span");
label.textContent = index;
label.style.position = "absolute";
label.style.top = "-19px";
label.style.left = "0px";
label.style.background = borderColor;
label.style.color = "white";
label.style.padding = "2px 4px";
label.style.fontSize = "12px";
label.style.borderRadius = "2px";
newElement.appendChild(label);
document.body.appendChild(newElement);
labels.push(newElement);
// item.element.setAttribute("-ai-label", label.textContent);
});
})
return selector_id_table;
}
return markPage();
})()

View File

@@ -0,0 +1,39 @@
import csv
import requests
# Latest run on 2024.1.4
def download_csv(url, file_path):
response = requests.get(url)
with open(file_path, 'w', newline='', encoding='utf-8') as file:
file.write(response.text)
def read_csv(file_path):
urls = []
with open(file_path, newline='', encoding='utf-8') as csvfile:
reader = csv.reader(csvfile)
next(reader, None) # Skip the header
for row in reader:
urls.append(row[2]) # Assuming the URL is in the third column
return urls
def main():
url = 'http://downloads.majestic.com/majestic_million.csv'
file_path = 'majestic_million.csv'
print("Downloading Majestic Million CSV...")
download_csv(url, file_path)
print("Reading URLs from CSV...")
urls = read_csv(file_path)
# Print the first 10 URLs as a sample
for url in urls[:10]:
print(url)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,119 @@
import json
import os
import random
import time
import uuid
from multiprocessing import Pool
from playwright.sync_api import sync_playwright
from majestic_million_download import read_csv
# JavaScript code as a string
with open('get_tag_elem_dict.js', 'r') as f:
get_tag_elem_dict_js_code = f.read()
def scrape_data(website_url, action_depth=10):
# if file exists, skip
if os.path.exists(os.path.join('collected_data', website_url.split("//")[1])):
print("Data already exists, skipping...")
return
def click_random_link(page):
links = page.query_selector_all("a")
if links:
random_link = random.choice(links)
try:
page.evaluate("window.unmarkPage()")
# Capture the initial HTML content of the body
initial_content = page.inner_html("body")
# Click the link and wait for potential navigation
random_link.click()
page.wait_for_timeout(5000) # wait for 5 seconds to allow page changes to occur
# Capture the new HTML content of the body
new_content = page.inner_html("body")
# Compare the contents
if new_content != initial_content:
print("Content change detected.")
return True
else:
print("No content change detected.")
return False
except Exception as e:
print("Error occurred:", e)
return False
else:
print("No links found on the page.")
return False
return False
with sync_playwright() as p:
# Launch the browser
browser = p.chromium.launch()
context = browser.new_context(viewport={'width': 1920, 'height': 1080}, locale='en-US')
context.set_extra_http_headers({'Accept-Language': 'en-US'})
page = context.new_page()
# Navigate to Google
page.goto(website_url, timeout=60000, wait_until='networkidle')
data_id = str(uuid.uuid4())
data_dir = os.path.join('collected_data', website_url.split("//")[1], data_id)
os.makedirs(data_dir, exist_ok=True)
page.screenshot(path=os.path.join(data_dir, 'screenshot_0.png'))
tag_elem_dict = page.evaluate(get_tag_elem_dict_js_code)
with open(os.path.join(data_dir, 'meta_data_0.json'), 'w') as f:
json.dump({
'timestamp': time.time(),
'url': website_url,
'data_id': data_id,
'tag_elem_dict': tag_elem_dict
}, f, indent=4)
page.screenshot(path=os.path.join(data_dir, 'screenshot_som_0.png'))
for i in range(action_depth):
if not click_random_link(page):
print("Invalid click or no navigation, stopping random clicks.")
break
page.screenshot(path=os.path.join(data_dir, f'screenshot_{i + 1}.png'))
tag_elem_dict = page.evaluate(get_tag_elem_dict_js_code)
with open(os.path.join(data_dir, f'meta_data_{i + 1}.json'), 'w') as f:
json.dump({
'timestamp': time.time(),
'url': website_url,
'data_id': data_id,
'tag_elem_dict': tag_elem_dict
}, f, indent=4)
page.screenshot(path=os.path.join(data_dir, f'screenshot_som_{i + 1}.png'))
# Close the browser
browser.close()
def run_one(url):
try:
scrape_data("https://" + url, action_depth=5)
except Exception as e:
print("Error scraping data:", e)
print("Start next one...")
def main():
urls = read_csv("majestic_million.csv")[:20000]
# Number of processes
num_processes = 50 # Adjust based on your system's capability, on my i9-13900k, 50 processes can be used
with Pool(num_processes) as pool:
pool.map(run_one, urls)
if __name__ == '__main__':
main()

View File

@@ -22,3 +22,4 @@ openpyxl
python-docx python-docx
python-pptx python-pptx
pypdf pypdf
PyGetWindow