Merge branch 'main' into zdy

This commit is contained in:
David Chang
2024-01-22 14:49:57 +08:00
49 changed files with 3067 additions and 1348 deletions

View File

@@ -5,10 +5,10 @@ import os
import subprocess
import tempfile
import time
from typing import Callable, Any, Optional
from typing import Callable, Any, Optional, Tuple
# import uuid
# import platform
from typing import List, Dict
from typing import List, Dict, Union
import gymnasium as gym
@@ -48,7 +48,8 @@ class DesktopEnv(gym.Env):
action_space: str = "computer_13",
task_config: Dict[str, Any] = None,
tmp_dir: str = "tmp",
cache_dir: str = "cache"
cache_dir: str = "cache",
screen_size: Tuple[int] = (1920, 1080)
):
"""
Args:
@@ -73,6 +74,7 @@ class DesktopEnv(gym.Env):
self.path_to_vm = os.path.abspath(os.path.expandvars(os.path.expanduser(path_to_vm)))
self.tmp_dir_base: str = tmp_dir
self.cache_dir_base: str = cache_dir
self.vm_screen_size = screen_size
# task-aware stuffs
# todo: handling the logic of snapshot directory
@@ -80,6 +82,7 @@ class DesktopEnv(gym.Env):
# Initialize emulator and controller
logger.info("Initializing...")
self._config_screen_size()
self._start_emulator()
self.vm_ip = self._get_vm_ip()
self.controller = PythonController(vm_ip=self.vm_ip)
@@ -87,7 +90,6 @@ class DesktopEnv(gym.Env):
# Meta info of the VM, move to the reset() function
self.vm_platform: str = "" # self.controller.get_vm_platform()
self.vm_screen_size = None # self.controller.get_vm_screen_size()
# mode: human or machine
assert action_space in ["computer_13", "pyautogui"]
@@ -101,6 +103,57 @@ class DesktopEnv(gym.Env):
self._step_no: int = 0
self.action_history: List[Dict[str, any]] = []
def _config_screen_size(self):
def calculate_vram_size(width, height, bits_per_pixel=32):
"""
Calculate VRAM size for given width, height, and color depth.
Color depth defaults to 32 bits per pixel.
"""
bytes_per_pixel = bits_per_pixel // 8
vram_size = width * height * bytes_per_pixel
return vram_size
if not os.path.isfile(self.path_to_vm):
logger.warning(f"The specified vmx file does not exist: {self.path_to_vm}")
return False
width, height = self.vm_screen_size
vramSize = calculate_vram_size(width, height)
try:
with open(self.path_to_vm, 'r') as file:
lines = file.readlines()
new_lines = []
for line in lines:
if "svga.autodetect" in line:
continue
elif "svga.vramSize" in line:
continue
elif "displayWidth" in line:
continue
elif "displayHeight" in line:
continue
else:
new_lines.append(line)
# Append new settings for screen size and VRAM.
new_lines.append(f'svga.autodetect = "TRUE"\n')
new_lines.append(f'svga.vramSize = "{vramSize}"\n')
new_lines.append(f'displayWidth = "{width}"\n')
new_lines.append(f'displayHeight = "{height}"\n')
with open(self.path_to_vm, 'w') as file:
file.writelines(new_lines)
logger.info(f"Screen size for {self.path_to_vm} set to {width}x{height} with VRAM size {vramSize} bytes")
return True
except IOError as e:
logger.error(f"An IOError occurred: {e}")
return False
except Exception as e:
logger.error(f"An error occurred: {e}")
return False
def _start_emulator(self):
while True:
try:
@@ -119,7 +172,7 @@ class DesktopEnv(gym.Env):
logger.error(f"Error executing command: {e.output.decode().strip()}")
def _get_vm_ip(self):
max_retries = 10
max_retries = 20
logger.info("Getting IP Address...")
for _ in range(max_retries):
try:
@@ -159,12 +212,44 @@ class DesktopEnv(gym.Env):
self.instruction = task_config["instruction"]
self.config = task_config["config"]
# evaluator dict
# func -> metric function string, or list of metric function strings
# conj -> conjunction of multiple metrics if func is a list with length > 1, "and"/"or"
# result -> result getter config, or list of result getter configs
# expected (optional) -> expected getter config, or list of expected getter configs
# options (optional) -> metric options, or list of metric options
# if func is a str list, then result, expected (if exists), options (if exists) should also be lists of the same length
# even if one of the metrics does not need expected or options field, it should be included in the list with None
self.evaluator = task_config["evaluator"]
self.metric: Metric = getattr(metrics, self.evaluator["func"])
self.result_getter: Getter = getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
self.expected_getter: Getter = getattr(getters, "get_{:}".format(
self.evaluator["expected"]["type"])) if "expected" in self.evaluator else None
self.metric_options: Dict[str, Any] = self.evaluator.get("options", {})
self.metric: Metric = [getattr(metrics, func) for func in self.evaluator["func"]] \
if isinstance(self.evaluator["func"], list) \
else getattr(metrics, self.evaluator["func"])
self.metric_conj: str = self.evaluator.get("conj", "and") # take conjunction of multiple metrics
self.result_getter: Getter = [getattr(getters, "get_{:}".format(res["type"])) for res in
self.evaluator["result"]] \
if isinstance(self.evaluator["result"], list) \
else getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
if "expected" in self.evaluator:
self.expected_getter: Getter = [getattr(getters, "get_{:}".format(exp["type"])) if exp else None for exp in
self.evaluator["expected"]] \
if isinstance(self.evaluator["expected"], list) \
else getattr(getters, "get_{:}".format(self.evaluator["expected"]["type"]))
else:
self.expected_getter = [None] * len(self.metric) \
if isinstance(self.metric, list) \
else None
self.metric_options: Union[List[Dict[str, Any]], Dict[str, Any]] = [opt if opt else {} for opt in
self.evaluator["options"]] \
if isinstance(self.evaluator.get("options", {}), list) \
else self.evaluator["options"] \
if "options" in self.evaluator \
else [{}] * len(self.metric) \
if isinstance(self.metric, list) \
else {}
assert (not isinstance(self.evaluator["func"], list)
or (len(self.metric) == len(self.result_getter) == len(self.expected_getter) == len(
self.metric_options)))
def reset(self, task_config: Optional[Dict[str, Any]] = None, seed=None, options=None) -> Dict[str, Any]:
logger.info("Resetting environment...")
@@ -190,6 +275,8 @@ class DesktopEnv(gym.Env):
_execute_command(["vmrun", "-T", "ws", "revertToSnapshot", self.path_to_vm, self.snapshot_path])
time.sleep(5)
self._config_screen_size()
print(self.vm_screen_size)
logger.info("Starting emulator...")
self._start_emulator()
logger.info("Emulator started.")
@@ -197,6 +284,7 @@ class DesktopEnv(gym.Env):
logger.info("Get meta info of the VM...")
self.vm_platform = self.controller.get_vm_platform()
self.vm_screen_size = self.controller.get_vm_screen_size()
print(self.vm_screen_size)
logger.info("Setting up environment...")
self.setup_controller.setup(self.config)
@@ -256,17 +344,41 @@ class DesktopEnv(gym.Env):
self.setup_controller.setup(self.evaluator.get("postconfig", []))
try:
result_state = self.result_getter(self, self.evaluator["result"])
except FileNotFoundError:
logger.error("File not found!")
return 0
if type(self.metric) == list:
for idx, metric in enumerate(self.metric):
try:
config = self.evaluator["result"][idx]
result_state = self.result_getter[idx](self, config)
except FileNotFoundError:
logger.error("File not found!")
if self.metric_conj == 'and':
return 0
expected_state = self.expected_getter(self, self.evaluator["expected"]) if "expected" in self.evaluator \
else None
expected = self.evaluator["expected"][idx]
expected_state = self.expected_getter[idx](self, expected) if expected else None
metric: float = self.metric(result_state, expected_state, **self.metric_options) if expected_state is not None \
else self.metric(result_state, **self.metric_options)
metric: int = metric(result_state, expected_state,
**self.metric_options[idx]) if expected_state is not None \
else metric(result_state, **self.metric_options[idx])
if self.metric_conj == 'and' and not bool(metric):
return 0
elif self.metric_conj == 'or' and bool(metric):
return 1
return 1 if self.metric_conj == 'and' else 0
else:
try:
result_state = self.result_getter(self, self.evaluator["result"])
except FileNotFoundError:
logger.error("File not found!")
return 0
expected_state = self.expected_getter(self, self.evaluator["expected"]) if "expected" in self.evaluator \
else None
metric: float = self.metric(result_state, expected_state,
**self.metric_options) if expected_state is not None \
else self.metric(result_state, **self.metric_options)
return metric

View File

@@ -14,4 +14,6 @@ from .gimp import increase_saturation, decrease_brightness, check_file_exists, c
from .general import check_csv, check_accessibility_tree, check_list, run_sqlite3, check_json
from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter
from .vscode import compare_text_file, compare_config, compare_answer, is_extension_installed
from .impress import check_slide_numbers_color, compare_pptx_files, check_for_two_lines, check_for_audio, check_formula_shape, check_file_exists
from .impress import check_image_stretch_and_center, check_slide_numbers_color, compare_pptx_files, check_strikethrough, \
check_for_audio, check_formula_shape
from .impress import check_slide_orientation_Portrait, contains_mp4_video

View File

@@ -180,8 +180,8 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str
with open(result) as f:
result: Dict[str, Any] = json.load(f)
expect_rules = rule.get("expect", {})
unexpect_rules = rule.get("unexpect", {})
expect_rules = rules.get("expect", {})
unexpect_rules = rules.get("unexpect", {})
metric = True
for r in expect_rules:

View File

@@ -1,24 +1,56 @@
from pptx import Presentation
import os
from pptx.util import Inches
def check_image_stretch_and_center(modified_ppt, original_ppt):
# fixme: this func is overfit to this example libreoffice_impress
# Load the presentations
original_pres = Presentation(original_ppt)
modified_pres = Presentation(modified_ppt)
# Get the first slide of each presentation
original_slide = original_pres.slides[0]
modified_slide = modified_pres.slides[0]
# Get the image on the first slide of each presentation
original_slide_images = [shape for shape in original_slide.shapes if shape.shape_type == 13]
modified_slide_images = [shape for shape in modified_slide.shapes if shape.shape_type == 13]
the_image = original_slide_images[0]
# Get the images that modified in width and height
for modified_image in modified_slide_images:
if the_image.image.blob == modified_image.image.blob:
the_modified_image = modified_image
if (abs(the_modified_image.width - original_pres.slide_width) > Inches(0.1) or
abs(the_modified_image.height - original_pres.slide_height) > Inches(0.1) or
abs(the_modified_image.left - (original_pres.slide_width - the_modified_image.width) / 2) > Inches(0.1) or
abs(the_modified_image.top - (original_pres.slide_height - the_modified_image.height) / 2) > Inches(0.1)):
return False
return True
def is_red_color(color):
#judge if the color is red
# judge if the color is red
print(color.rgb)
return color and color.rgb == (255, 0, 0)
def get_master_placeholder_color(prs):
# get the color of the placeholder
masters = prs.slide_masters
for idx, master in enumerate(masters):
for placeholder in master.placeholders:
if placeholder.has_text_frame and placeholder.text == "<number>":
for placeholder in master.placeholders:
if placeholder.has_text_frame and placeholder.text == "<number>":
text_frame = placeholder.text_frame
if text_frame.paragraphs:
first_paragraph = text_frame.paragraphs[0]
return first_paragraph.font.color
return None
return first_paragraph.font.color
return None
def check_slide_numbers_color(pptx_file_path):
presentation = Presentation(pptx_file_path)
@@ -34,42 +66,65 @@ def check_slide_numbers_color(pptx_file_path):
print(font_color)
return 1 if font_color is not None and is_red_color(font_color) else 0
def compare_pptx_files(file1_path, file2_path):
def compare_pptx_files(file1_path, file2_path, **options):
# todo: not strictly match since not all information is compared because we cannot get the info through pptx
prs1 = Presentation(file1_path)
prs2 = Presentation(file2_path)
# compare the number of slides
if len(prs1.slides) != len(prs2.slides):
return 0
return False
# compare the content of each slide
for slide1, slide2 in zip(prs1.slides, prs2.slides):
# check if the shapes are the same
for shape1, shape2 in zip(slide1.shapes, slide2.shapes):
if shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
return False
if hasattr(shape1, "text") and hasattr(shape2, "text"):
if shape1.text != shape2.text:
return 0
return 1
return False
def has_two_lines_on_page(slide):
line_count = 0
for shape in slide.shapes:
if shape.shape_type == 1: # 1 表示 Line 形状
line_count += 1
if line_count >= 2:
return True
return False
# check if the paragraphs are the same
for para1, para2 in zip(shape1.text_frame.paragraphs, shape2.text_frame.paragraphs):
# check if the runs are the same
for run1, run2 in zip(para1.runs, para2.runs):
if run1.text != run2.text:
return False
def check_for_two_lines(prs):
prs = Presentation(prs)
for i, slide in enumerate(prs.slides):
if has_two_lines_on_page(slide):
return 1
return 0
# check if the font properties are the same
if run1.font.name != run2.font.name or run1.font.size != run2.font.size or run1.font.bold != run2.font.bold or run1.font.italic != run2.font.italic or run1.font.color.rgb != run2.font.color.rgb:
return False
return True
def check_strikethrough(pptx_path, rules):
# Load the presentation
presentation = Presentation(pptx_path)
slide_index_s = rules["slide_index_s"]
shape_index_s = rules["shape_index_s"]
paragraph_index_s = rules["paragraph_index_s"]
for slide_index in slide_index_s:
# Get the slide
slide = presentation.slides[slide_index]
for shape_index in shape_index_s:
# Get the text box
paragraphs = slide.shapes[shape_index].text_frame.paragraphs
for paragraph_index in paragraph_index_s:
paragraph = paragraphs[paragraph_index]
run = paragraph.runs[0]
if 'strike' not in run.font._element.attrib:
return False
return True
def check_file_exists(directory, filename):
file_path = os.path.join(directory, filename)
return 1 if os.path.isfile(file_path) else 0
def has_audio_on_page(slide):
for shape in slide.shapes:
@@ -77,6 +132,7 @@ def has_audio_on_page(slide):
return True
return False
def check_for_audio(prs):
prs = Presentation(prs)
for i, slide in enumerate(prs.slides):
@@ -84,16 +140,39 @@ def check_for_audio(prs):
return 1
return 0
def check_formula_shape(prs):
prs = Presentation(prs)
slide = prs.slides[13]
for shape in slide.shapes:
if shape.has_text_frame and shape.shape_type == 1:
return 1
return 0
def check_slide_orientation_Portrait(pptx_path):
presentation = Presentation(pptx_path)
slide_height = presentation.slide_height
slide_width = presentation.slide_width
if slide_width < slide_height:
return 1
return 0
def contains_mp4_video(pptx_path):
prs = Presentation(pptx_path)
for slide in prs.slides:
for shape in slide.shapes:
if shape.shape_type == 16:
if shape.media_type == 3:
return 1
return 0
if __name__ == "__main__":
path1 = "../../任务数据/LibreOffice Impress/Change_Color_Slide_Number_gold_textbox.pptx"
presentation = Presentation(path1)

View File

@@ -1,5 +1,44 @@
from typing import Dict
import json
def check_json_keybindings(actual: str, expected: str, **options) -> float:
"""
Args:
actual (str): path to result text file
expected (str): expected dict{}
Return:
float: the score
"""
with open(actual) as f:
data = json.load(f)
if expected in data:
return 1.0
else:
return 0.0
def check_json_settings(actual: str, expected: str, **options) -> float:
"""
Args:
actual (str): path to result text file
expected (str): expected dict{}
Return:
float: the score
"""
with open(actual) as f:
data = json.load(f)
expect = set(expected.items())
json = set(data.items())
if expect.issubset(json):
return 1.0
else:
return 0.0
def compare_text_file(actual: str, expected: str, **options) -> float:
"""

View File

@@ -9,7 +9,7 @@
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1WT1-L0iiIlF2kuIK77IDxTfBaQ0X0BbX&export=download&authuser=0&confirm=t&uuid=0b69767e-1f3e-49ce-88a7-1036ef25bcaf&at=APZUnTXZ_sqEZUrHNx1edWep017b:1705337750065",
"url": "https://drive.usercontent.google.com/download?id=1WT1-L0iiIlF2kuIK77IDxTfBaQ0X0BbX&export=download&authuser=0&confirm=t&uuid=3daac1dc-0f6e-449b-b6bc-09fd246697aa&at=APZUnTVgf_yEeeaARnUISIE4wr4E:1705768410739",
"path": "Desktop/Ch5.pptx"
}
]

View File

@@ -1,12 +1,42 @@
{
"id": "3b27600c-3668-4abd-8f84-7bcdebbccbdb",
"snapshot": "libreoffice_impress",
"instruction": "Could you help me change the background color to blue 2 and apply it to all my slides.",
"instruction": "Please make the background blue on all my slides.",
"source": "https://www.libreofficehelp.com/change-slide-background-impress/#All_Slides",
"config": [],
"config": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1aHMJzk2G8B_EqDlTAZLEiJ4h-ZsgA9UE&export=download&authuser=0&confirm=t&uuid=196a082d-5f08-4b3e-a64f-c021351f9cd8&at=APZUnTUXH4gvLvElvm9TtFhUJlIn:1705481007789",
"path": "Desktop/lec17-gui-events.pptx"
}
]
}
},
{
"type": "open",
"parameters": {
"path": "Desktop/lec17-gui-events.pptx"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
""
"libreoffice_impress"
],
"evaluator": "evaluation_dir"
}
"evaluator": {
"func": "compare_pptx_files",
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1LU-wnmIqMQgwkdAUFBLE1wNkH4gSl3IR&export=download&authuser=0&confirm=t&uuid=74520405-4028-4fbe-bab8-d56dc82ffb6c&at=APZUnTU0dz5ZE5CcQry8IeY5_s1J:1705481009686",
"dest": "lec17-gui-events_Gold.docx"
},
"result": {
"type": "vm_file",
"path": "Desktop/lec17-gui-events.pptx",
"dest": "lec17-gui-events.pptx"
}
}
}

View File

@@ -27,8 +27,16 @@
"libreoffice_impress"
],
"evaluator": {
"func": "check_file_exists",
"file_name": "res.png",
"directory": "/home/user/Desktop/"
"func": "compare_images",
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1XTDaQ2NlovrusKkuY6udi_BQfLwSP9th&export=download&authuser=0&confirm=t&uuid=d3c7883e-3cea-4bf3-8f83-d878622ee76d&at=APZUnTXQEnT0Gi4rB0oegvVGheyn:1705859805154",
"dest": "res_gold.png"
},
"result": {
"type": "vm_file",
"path": "/home/user/Desktop/res.png",
"dest": "res.png"
}
}
}

View File

@@ -2,7 +2,7 @@
"id": "550ce7e7-747b-495f-b122-acdc4d0b8e54",
"snapshot": "libreoffice_impress",
"instruction": "I am checking our soccer club's to-do list for the last semester and adding strike-through sign on the line we have already accomplished. Could you help me add a strike-through on the first and second line?",
"source": "https://superuser.com/questions/1211035/libreoffice-impress-animations-how-to-strikethrough-on-click?rq=1",
"source": "https://technical-tips.com/blog/software/text-in-libreoffice-strikethrough--6948#:~:text=To%20strikethrough%20Text%20in%20LibreOffice%201%20In%20your,effect%22%20can%20your%20additionally%2C%20for%20example%2C%20double%20underline.",
"config": [
{
"type": "download",
@@ -27,7 +27,15 @@
"libreoffice_impress"
],
"evaluator": {
"func": "check_for_two_lines",
"func": "check_strikethrough",
"expected": {
"type": "rule",
"rules": {
"slide_index_s": [4],
"shape_index_s": [1],
"paragraph_index_s": [1, 2]
}
},
"result": {
"type": "vm_file",
"path": "Desktop/New_Club_Spring_2018_Training.pptx",

View File

@@ -9,7 +9,7 @@
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=16K6TpGIRZpqOJUu-mtJQ_78kIwLcn-4D&export=download&authuser=0&confirm=t&uuid=945b6f33-53d2-4e87-ada9-efa8b938a499&at=APZUnTVw4fKyJPW0vAAJURruAJIP:1705250184439",
"url": "https://drive.usercontent.google.com/download?id=16K6TpGIRZpqOJUu-mtJQ_78kIwLcn-4D&export=download&authuser=0&confirm=t&uuid=41509e5c-eb95-453a-baad-4e12a839a120&at=APZUnTVygE_LL27vx1l6OEg_FRj0:1705849959413",
"path": "Desktop/CPD_Background_Investigation_Process.pptx"
}
]
@@ -27,11 +27,11 @@
"libreoffice_impress"
],
"evaluator": {
"func": "compare_pptx_files",
"func": "check_image_stretch_and_center",
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1rsvFPyHYiIPh1c8Nj8say0NJCG2VIDr7&export=download&authuser=0&confirm=t&uuid=aac08a92-6595-47d8-84dc-8f1ab1df987f&at=APZUnTXIWCn5B0CpLttvG2bsr_a7:1705250423565",
"dest": "CPD_Background_Investigation_Process_Gold.docx"
"path": "https://drive.usercontent.google.com/download?id=16K6TpGIRZpqOJUu-mtJQ_78kIwLcn-4D&export=download&authuser=0&confirm=t&uuid=41509e5c-eb95-453a-baad-4e12a839a120&at=APZUnTVygE_LL27vx1l6OEg_FRj0:1705849959413",
"dest": "CPD_Background_Investigation_Process_Original.pptx"
},
"result": {
"type": "vm_file",

View File

@@ -1,12 +1,34 @@
{
"id": "a097acff-6266-4291-9fbd-137af7ecd439",
"snapshot": "libreoffice_impress",
"instruction": "Could you help me save my slides to SAVE_PATH?",
"instruction": "Could you help me save my slides as pre.pptx on the Desktop?",
"source": "https://www.youtube.com/watch?v=DDmEvjs4iBw",
"config": [],
"config": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1i_-m6mVrdesGJ392bulH5lveHarMwKk_&export=download&authuser=0&confirm=t&uuid=057973d3-52b7-45ac-8151-b2c6a1820f49&at=APZUnTU5SYajgO-YrxdDWSiJRfD4:1705768888387",
"path": "Desktop/Secrets-of-Monetizing-Video.pptx"
}
]
}
},
{
"type": "open",
"parameters": {
"path": "Desktop/Secrets-of-Monetizing-Video.pptx"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
""
"libreoffice_impress"
],
"evaluator": "evaluation_dir"
}
"evaluator": {
"func": "check_file_exists",
"file_name": "pre.pptx",
"directory": "/home/user/Desktop/"
}
}

View File

@@ -6,7 +6,7 @@
"config": [],
"trajectory": "trajectories/",
"related_apps": [
""
"libreoffice_impress"
],
"evaluator": "evaluation_dir"
}

View File

@@ -1,8 +1,8 @@
{
"id": "af23762e-2bfd-4a1d-aada-20fa8de9ce07",
"snapshot": "libreoffice_impress",
"instruction": "I am making PPT on LibreOffice Impress for presentation tomorrow. I need to summarize contents on one slide. Could you make a summary slide for me?",
"source": "https://superuser.com/questions/1059080/how-to-make-a-summary-slide-in-impress-listing-the-titles-of-all-slides-autom",
"instruction": "I am making PPT on LibreOffice Impress for presentation tomorrow. I need to summarize contents on one slide use Impress \"Summary Slide\" feature. Could you make that for me?",
"source": "https://www.libreofficehelp.com/export-libreoffice-impress-slides-images/#:~:text=Exporting%20a%20single%20slide%20as.jpg%2C.png%2C%20etc%20image%20is,on%20the%20checkbox%20Selection.%20Provide%20jpg%20quality%20options.",
"config": [
{
"type": "download",

View File

@@ -1,12 +1,37 @@
{
"id": "ce88f674-ab7a-43da-9201-468d38539e4a",
"snapshot": "libreoffice_impress",
"instruction": "Could you help me change my slides to portrait (from landscape)?",
"instruction": "Please set my slides upright instead of sideways.",
"source": "https://justclickhere.co.uk/resources/change-slides-in-impress-to-portrait/",
"config": [],
"config": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1LErTnC_w_YPQVo84QK5sifww9xZ-Cq0X&export=download&authuser=0&confirm=t&uuid=81ff0aaf-9c2e-4342-b7ce-36e65dd2218e&at=APZUnTUmQKCTp2HUP0dOqYqD10G3:1705479016156",
"path": "Desktop/AM_Last_Page_Template.pptx"
}
]
}
},
{
"type": "open",
"parameters": {
"path": "Desktop/AM_Last_Page_Template.pptx"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
""
"libreoffice_impress"
],
"evaluator": "evaluation_dir"
}
"evaluator": {
"func": "check_slide_orientation_Portrait",
"result": {
"type": "vm_file",
"path": "Desktop/AM_Last_Page_Template.pptx",
"dest": "AM_Last_Page_Template.pptx"
}
}
}

View File

@@ -1,12 +1,48 @@
{
"id": "f0a334af-f91b-4c03-b578-aac9bec2b543",
"snapshot": "libreoffice_impress",
"instruction": "Help me insert the video at VIDEO_PATH in the current slide.",
"instruction": "Insert the video Movie_countdown_2.mov on the Desktop into my current slide, please.",
"source": "https://www.libreofficehelp.com/insert-video-impress-presentation/#Inserting_a_Video_in_Impress",
"config": [],
"config": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1vvRkrxOK_sFPX9PLFniFqrdNEZ2pQnPP&export=download&authuser=0&confirm=t&uuid=71964a12-2d0a-4c71-9375-2f9ec15de1ad&at=APZUnTX_B-T2GeZPS7ZmchMQ6E7m:1705481285721",
"path": "Desktop/Movie_activities_TURKEY.pptx"
}
]
}
},
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1S4lACtBb40Ff0DEjB2bG2tzr2qWwQLGd&export=download&authuser=0&confirm=t&uuid=a28c123e-5371-4e17-82c2-ed7b1f05b728&at=APZUnTW_rlUPV6mM4RjS0R6dMSv4:1705469776913",
"path": "Desktop/Movie_countdown_2.mov"
}
]
}
},
{
"type": "open",
"parameters": {
"path": "Desktop/Movie_activities_TURKEY.pptx"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
""
"libreoffice_impress"
],
"evaluator": "evaluation_dir"
}
"evaluator": {
"func": "contains_mp4_video",
"result": {
"type": "vm_file",
"path": "Desktop/Movie_activities_TURKEY.pptx",
"dest": "Movie_activities_TURKEY.pptx"
}
}
}

View File

@@ -1,7 +1,7 @@
{
"id": "59f21cfb-0120-4326-b255-a5b827b38967",
"snapshot": "base_setup",
"instruction": "Could you play the music video that's saved on my desktop for me?",
"instruction": "Could you play the music video that's saved on my desktop for me via vlc?",
"source": "https://docs.videolan.me/vlc-user/desktop/3.0/en/basic/media.html#playing-a-file",
"config": [
{

View File

@@ -0,0 +1,40 @@
{
"id": "276cc624-87ea-4f08-ab93-f770e3790175",
"snapshot": "vscode",
"instruction": "Could you help me set the line length to 50 characters for current user in VS Code?",
"source": "https://www.quora.com/unanswered/How-do-you-set-the-line-length-in-Visual-Studio-Code",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"code"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Visual Studio Code"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"vscode"
],
"evaluator": {
"func": "check_json_settings",
"expected": {
"type": "rule",
"rules": {
"expect": {"editor.rulers": [50]}
}
},
"result": {
"type": "vm_file",
"path": "/home/user/.config/Code/User/settings.json",
"dest": "settings.json"
}
}
}

View File

@@ -1,12 +0,0 @@
{
"id": "3486f395-ad68-459c-8c39-ea07de934dd4",
"snapshot": "vscode",
"instruction": "Find me the keyboard shortcut of toggling integrated terminal. ",
"source": "https://www.youtube.com/watch?v=VqCgcpAypFQ",
"config": [],
"trajectory": "trajectories/3486f395-ad68-459c-8c39-ea07de934dd4",
"related_apps": [
"vscode"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,46 @@
{
"id": "4e60007a-f5be-4bfc-9723-c39affa0a6d3",
"snapshot": "vscode",
"instruction": "Install autoDocstring extension.",
"source": "https://campbell-muscle-lab.github.io/howtos_Python/pages/documentation/best_practices/vscode_docstring_extension/vscode_docstring_extension.html#:~:text=Type%2C%20Ctrl%20%2B%20Shift%20%2B%20P,select%20the%20NumPy%20docstring%20format.",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"code"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Visual Studio Code"
}
}
],
"trajectory": "trajectories/4e60007a-f5be-4bfc-9723-c39affa0a6d3",
"related_apps": [
"vscode"
],
"evaluator": {
"func": "is_extension_installed",
"result": {
"type": "vm_command_line",
"command": [
"code",
"--list-extensions",
"|",
"grep",
"njpwerner.autodocstring"
]
},
"expected": {
"type": "rule",
"rules": {
"type": "contain",
"expected": "njpwerner.autodocstring"
}
}
}
}

View File

@@ -1,12 +0,0 @@
{
"id": "515630d2-9b30-430c-b06a-e86b0143f7fb",
"snapshot": "vscode",
"instruction": "Help me enable automatically run code in VS code",
"source": "https://www.quora.com/How-do-I-automatically-run-code-in-Visual-Studio-Code",
"config": [],
"trajectory": "trajectories/",
"related_apps": [
"vscode"
],
"evaluator": "evaluation_dir"
}

View File

@@ -37,7 +37,7 @@
"expected": {
"type": "rule",
"rules": {
"expect": "100"
"expect": "1"
}
},
"result": {

View File

@@ -1,12 +0,0 @@
{
"id": "6f7546b0-52f3-4938-9213-52f35454d314",
"snapshot": "vscode",
"instruction": "Help me ask chatGPT to generate html and css code for a scroll bar?",
"source": "https://www.tiktok.com/@akramovdev/video/7243349980897922306",
"config": [],
"trajectory": "trajectories/",
"related_apps": [
"vscode"
],
"evaluator": "evaluation_dir"
}

View File

@@ -1,12 +0,0 @@
{
"id": "90f6eeeb-f3c2-4c98-873c-e77d78a45578",
"snapshot": "vscode",
"instruction": "Help me sync extensions and settings across all profiles.",
"source": "https://stackoverflow.com/questions/75866801/how-do-i-sync-extensions-and-their-settings-between-vs-code-profiles",
"config": [],
"trajectory": "trajectories/",
"related_apps": [
"vscode"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,45 @@
{
"id": "930fdb3b-11a8-46fe-9bac-577332e2640e",
"snapshot": "vscode",
"instruction": "I want to create a shortcut to shift my focus cursor from terminal to Editor in VS Code. Please help me create this shortcut to be 'ctrl+j'.",
"source": "https://superuser.com/questions/1270103/how-to-switch-the-cursor-between-terminal-and-code-in-vscode",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"code"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Visual Studio Code"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"vscode"
],
"evaluator": {
"func": "check_json_keybindings",
"expected": {
"type": "rule",
"rules": {
"expect":
{
"key": "ctrl+j",
"command": "workbench.action.focusActiveEditorGroup",
"when": "terminalFocus"
}
}
},
"result": {
"type": "vm_file",
"path": "/home/user/.config/Code/User/keybindings.json",
"dest": "keybindings.json"
}
}
}

View File

@@ -0,0 +1,40 @@
{
"id": "9439a27b-18ae-42d8-9778-5f68f891805e",
"snapshot": "vscode",
"instruction": "I want to keep my cursor focus in debug console when debugging in VS Code, instead of focusing back to Editor. So please help me modify the setting of VS Code accordingly.",
"source": "https://stackoverflow.com/questions/75832474/how-to-keep-cursor-in-debug-console-when-debugging-in-visual-studio-code",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"code"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Visual Studio Code"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"vscode"
],
"evaluator": {
"func": "check_json_settings",
"expected": {
"type": "rule",
"rules": {
"expect": {"debug.focusEditorOnBreak": false}
}
},
"result": {
"type": "vm_file",
"path": "/home/user/.config/Code/User/settings.json",
"dest": "settings.json"
}
}
}

View File

@@ -0,0 +1,40 @@
{
"id": "9d425400-e9b2-4424-9a4b-d4c7abac4140",
"snapshot": "vscode",
"instruction": "I want to make tabs wrapped over multiple lines when exceeding available space, please help modify the setting of VS Code.",
"source": "https://superuser.com/questions/1466771/is-there-a-way-to-make-editor-tabs-stack-in-vs-code",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"code"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Visual Studio Code"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"vscode"
],
"evaluator": {
"func": "check_json_settings",
"expected": {
"type": "rule",
"rules": {
"expect": {"workbench.editor.wrapTabs": true}
}
},
"result": {
"type": "vm_file",
"path": "/home/user/.config/Code/User/settings.json",
"dest": "settings.json"
}
}
}

View File

@@ -0,0 +1,39 @@
{
"id": "ae506c68-352c-4094-9caa-ee9d42052317",
"snapshot": "vscode",
"instruction": "Could you store the full terminal history of my VS Code terminal into '/home/user/Desktop/history.txt'?",
"source": "",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"code"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Visual Studio Code"
}
}
],
"trajectory": "trajectories/ae506c68-352c-4094-9caa-ee9d42052317",
"related_apps": [
"vscode"
],
"evaluator": {
"func": "compare_text_file",
"expected": {
"type": "cloud_file",
"path": "",
"dest": "gold_history.txt"
},
"result": {
"type": "vm_file",
"path": "Desktop/history.txt",
"dest": "history.txt"
}
}
}

View File

@@ -0,0 +1,42 @@
{
"id": "e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2",
"snapshot": "vscode",
"instruction": "I want to disable the missing imports reporting of python error, please modify the setting of VS Code for me.",
"source": "https://superuser.com/questions/1386061/how-to-suppress-some-python-errors-warnings-in-vs-code",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"code"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Visual Studio Code"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"vscode"
],
"evaluator": {
"func": "check_json_settings",
"expected": {
"type": "rule",
"rules": {
"expect": {
"python.analysis.diagnosticSeverityOverrides": {"reportMissingImports": "none"}
}
}
},
"result": {
"type": "vm_file",
"path": "/home/user/.config/Code/User/settings.json",
"dest": "settings.json"
}
}
}

View File

@@ -0,0 +1,47 @@
{
"id": "ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae",
"snapshot": "vscode",
"instruction": "I want to remove the shortcut 'cmd+f' for Tree view Find (Explorer search) in VS Code explorer view due to shortcut conflict. Can you help me remove this shortcut?",
"source": ["https://superuser.com/questions/1748097/vs-code-disable-tree-view-find-explorer-search",
"https://superuser.com/questions/1417361/how-to-disable-file-filtering-in-vs-code-sidebar-explorer?rq=1"
],
"config": [
{
"type": "launch",
"parameters": {
"command": [
"code"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Visual Studio Code"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"vscode"
],
"evaluator": {
"func": "check_json_keybindings",
"expected": {
"type": "rule",
"rules": {
"expect":
{
"key": "cmd+f",
"command": "-list.find",
"when": "listFocus && listSupportsFind"
}
}
},
"result": {
"type": "vm_file",
"path": "/home/user/.config/Code/User/keybindings.json",
"dest": "keybindings.json"
}
}
}

141
experiment_a11y_tree.py Normal file
View File

@@ -0,0 +1,141 @@
import datetime
import json
import logging
import os
import sys
from desktop_env.envs.desktop_env import DesktopEnv
from mm_agents.gpt_4v_agent import GPT4v_Agent
# Logger Configs {{{ #
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8")
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8")
stdout_handler = logging.StreamHandler(sys.stdout)
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8")
file_handler.setLevel(logging.INFO)
debug_handler.setLevel(logging.DEBUG)
stdout_handler.setLevel(logging.INFO)
sdebug_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(
fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
file_handler.setFormatter(formatter)
debug_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter)
sdebug_handler.setFormatter(formatter)
stdout_handler.addFilter(logging.Filter("desktopenv"))
sdebug_handler.addFilter(logging.Filter("desktopenv"))
logger.addHandler(file_handler)
logger.addHandler(debug_handler)
logger.addHandler(stdout_handler)
logger.addHandler(sdebug_handler)
# }}} Logger Configs #
logger = logging.getLogger("desktopenv.experiment")
PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
env = DesktopEnv(
path_to_vm=PATH_TO_VM,
action_space=agent.action_space,
task_config=example
)
# reset the environment to certain snapshot
observation = env.reset()
done = False
step_num = 0
if recording:
# send a request to the server to start recording
env.controller.start_recording()
while not done and step_num < max_steps:
with open("accessibility_tree.xml", "w", encoding="utf-8") as f:
f.write(observation["accessibility_tree"])
actions = agent.predict(observation)
step_num += 1
for action in actions:
# Capture the timestamp before executing the action
action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
logger.info("Step %d: %s", step_num, action)
observation, reward, done, info = env.step(action)
logger.info("Reward: %.2f", reward)
logger.info("Done: %s", done)
logger.info("Info: %s", info)
# Save screenshot and trajectory information
with open(os.path.join(example_trajectory_dir, f"step_{step_num}_{action_timestamp}.png"), "wb") as _f:
with open(observation['screenshot'], "rb") as __f:
screenshot = __f.read()
_f.write(screenshot)
with open(trajectory_recording_path, "a") as f:
f.write(json.dumps({
"step_num": step_num,
"action_timestamp": action_timestamp,
"action": action,
"reward": reward,
"done": done,
"info": info,
"screenshot_file": f"step_{step_num}_{action_timestamp}.png"
}))
f.write("\n")
if done:
logger.info("The episode is done.")
break
if recording:
# send a request to the server to stop recording
env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
result = env.evaluate()
logger.info("Result: %.2f", result)
# env.close()
logger.info("Environment closed.")
if __name__ == "__main__":
action_space = "pyautogui"
example_class = "chrome"
example_id = "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
gpt4_model = "gpt-4-vision-preview"
gemini_model = "gemini-pro-vision"
logger.info("Running example %s/%s", example_class, example_id)
logger.info("Using model %s", gpt4_model)
# logger.info("Using model %s", gemini_model)
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r") as f:
example = json.load(f)
example["snapshot"] = "exp_setup4"
api_key = os.environ.get("OPENAI_API_KEY")
agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],
action_space=action_space, exp="a11y_tree")
# api_key = os.environ.get("GENAI_API_KEY")
# agent = GeminiPro_Agent(api_key=api_key, model=gemini_model, instruction=example['instruction'], action_space=action_space, exp="a11y_tree")
root_trajectory_dir = "exp_trajectory"
example_trajectory_dir = os.path.join(root_trajectory_dir, "a11y_tree", example_class, gpt4_model, example_id)
# example_trajectory_dir = os.path.join(root_trajectory_dir, "a11y_tree", example_class, gemini_model, example_id)
os.makedirs(example_trajectory_dir, exist_ok=True)
run_one_example(example, agent, 15, example_trajectory_dir)

View File

@@ -113,20 +113,28 @@ if __name__ == "__main__":
action_space = "pyautogui"
example_class = "thunderbird"
example_id = "bb5e4c0d-f964-439c-97b6-bdb9747de3f4"
gpt4_model = "gpt-4-vision-preview"
gemini_model = "gemini-pro-vision"
logger.info("Running example %s/%s", example_class, example_id)
logger.info("Using model %s", gpt4_model)
# logger.info("Using model %s", gemini_model)
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r") as f:
example = json.load(f)
example["snapshot"] = "exp_setup2"
# api_key = os.environ.get("OPENAI_API_KEY")
# agent = GPT4v_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space)
# agent = GPT4v_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space, exp="screenshot")
api_key = os.environ.get("GENAI_API_KEY")
agent = GeminiPro_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space)
agent = GeminiPro_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space, exp="screenshot")
root_trajectory_dir = "exp_trajectory"
example_trajectory_dir = os.path.join(root_trajectory_dir, example_class, example_id)
example_trajectory_dir = os.path.join(root_trajectory_dir, "a11y_tree", example_class, gpt4_model, example_id)
# example_trajectory_dir = os.path.join(root_trajectory_dir, "a11y_tree", example_class, gemini_model, example_id)
os.makedirs(example_trajectory_dir, exist_ok=True)
run_one_example(example, agent, 10, example_trajectory_dir)
run_one_example(example, agent, 15, example_trajectory_dir)

View File

@@ -0,0 +1,139 @@
import datetime
import json
import logging
import os
import sys
from desktop_env.envs.desktop_env import DesktopEnv
from mm_agents.gpt_4v_agent import GPT4v_Agent
# Logger Configs {{{ #
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8")
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8")
stdout_handler = logging.StreamHandler(sys.stdout)
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8")
file_handler.setLevel(logging.INFO)
debug_handler.setLevel(logging.DEBUG)
stdout_handler.setLevel(logging.INFO)
sdebug_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(
fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
file_handler.setFormatter(formatter)
debug_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter)
sdebug_handler.setFormatter(formatter)
stdout_handler.addFilter(logging.Filter("desktopenv"))
sdebug_handler.addFilter(logging.Filter("desktopenv"))
logger.addHandler(file_handler)
logger.addHandler(debug_handler)
logger.addHandler(stdout_handler)
logger.addHandler(sdebug_handler)
# }}} Logger Configs #
logger = logging.getLogger("desktopenv.experiment")
PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
env = DesktopEnv(
path_to_vm=PATH_TO_VM,
action_space=agent.action_space,
task_config=example
)
# reset the environment to certain snapshot
observation = env.reset()
done = False
step_num = 0
if recording:
# send a request to the server to start recording
env.controller.start_recording()
while not done and step_num < max_steps:
actions = agent.predict(observation)
step_num += 1
for action in actions:
# Capture the timestamp before executing the action
action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
logger.info("Step %d: %s", step_num, action)
observation, reward, done, info = env.step(action)
logger.info("Reward: %.2f", reward)
logger.info("Done: %s", done)
logger.info("Info: %s", info)
# Save screenshot and trajectory information
with open(os.path.join(example_trajectory_dir, f"step_{step_num}_{action_timestamp}.png"), "wb") as _f:
with open(observation['screenshot'], "rb") as __f:
screenshot = __f.read()
_f.write(screenshot)
with open(trajectory_recording_path, "a") as f:
f.write(json.dumps({
"step_num": step_num,
"action_timestamp": action_timestamp,
"action": action,
"reward": reward,
"done": done,
"info": info,
"screenshot_file": f"step_{step_num}_{action_timestamp}.png"
}))
f.write("\n")
if done:
logger.info("The episode is done.")
break
if recording:
# send a request to the server to stop recording
env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
result = env.evaluate()
logger.info("Result: %.2f", result)
# env.close()
logger.info("Environment closed.")
if __name__ == "__main__":
action_space = "pyautogui"
example_class = "chrome"
example_id = "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
gpt4_model = "gpt-4-vision-preview"
gemini_model = "gemini-pro-vision"
logger.info("Running example %s/%s", example_class, example_id)
logger.info("Using model %s", gpt4_model)
# logger.info("Using model %s", gemini_model)
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r") as f:
example = json.load(f)
example["snapshot"] = "exp_setup4"
api_key = os.environ.get("OPENAI_API_KEY")
agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],
action_space=action_space, exp="both")
# api_key = os.environ.get("GENAI_API_KEY")
# agent = GeminiPro_Agent(api_key=api_key, model=gemini_model, instruction=example['instruction'], action_space=action_space, exp="both")
root_trajectory_dir = "exp_trajectory"
example_trajectory_dir = os.path.join(root_trajectory_dir, "both", example_class, gpt4_model, example_id)
# example_trajectory_dir = os.path.join(root_trajectory_dir, "both", example_class, gemini_model, example_id)
os.makedirs(example_trajectory_dir, exist_ok=True)
run_one_example(example, agent, 15, example_trajectory_dir)

View File

@@ -5,8 +5,7 @@ import os
import sys
from desktop_env.envs.desktop_env import DesktopEnv
from mm_agents.gpt_4_agent import GPT4_Agent
from mm_agents.gemini_pro_agent import GeminiPro_Agent
from mm_agents.gpt_4v_agent import GPT4v_Agent
# Logger Configs {{{ #
logger = logging.getLogger()
@@ -111,8 +110,8 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
if __name__ == "__main__":
action_space = "pyautogui"
example_class = "chrome"
example_id = "7a5a7856-f1b6-42a4-ade9-1ca81ca0f263"
gpt4_model = "gpt-4-1106-preview"
example_id = "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
gpt4_model = "gpt-4-vision-preview"
gemini_model = "gemini-pro-vision"
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r") as f:
@@ -120,15 +119,16 @@ if __name__ == "__main__":
example["snapshot"] = "exp_setup4"
api_key = os.environ.get("OPENAI_API_KEY")
agent = GPT4_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'], action_space=action_space)
agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],
action_space=action_space, exp="seeact")
# api_key = os.environ.get("GENAI_API_KEY")
# agent = GeminiPro_Agent(api_key=api_key, model=gemini_model, instruction=example['instruction'], action_space=action_space)
root_trajectory_dir = "exp_trajectory"
example_trajectory_dir = os.path.join(root_trajectory_dir, "text", example_class, gpt4_model, example_id)
# example_trajectory_dir = os.path.join(root_trajectory_dir, "text", example_class, gemini_model, example_id)
example_trajectory_dir = os.path.join(root_trajectory_dir, "seeact", example_class, gpt4_model, example_id)
# example_trajectory_dir = os.path.join(root_trajectory_dir, "seeact", example_class, gemini_model, example_id)
os.makedirs(example_trajectory_dir, exist_ok=True)

View File

@@ -0,0 +1,135 @@
import datetime
import json
import logging
import os
import sys
from desktop_env.envs.desktop_env import DesktopEnv
from mm_agents.gpt_4v_agent import GPT4v_Agent
# Logger Configs {{{ #
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8")
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8")
stdout_handler = logging.StreamHandler(sys.stdout)
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8")
file_handler.setLevel(logging.INFO)
debug_handler.setLevel(logging.DEBUG)
stdout_handler.setLevel(logging.INFO)
sdebug_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(
fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
file_handler.setFormatter(formatter)
debug_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter)
sdebug_handler.setFormatter(formatter)
stdout_handler.addFilter(logging.Filter("desktopenv"))
sdebug_handler.addFilter(logging.Filter("desktopenv"))
logger.addHandler(file_handler)
logger.addHandler(debug_handler)
logger.addHandler(stdout_handler)
logger.addHandler(sdebug_handler)
# }}} Logger Configs #
logger = logging.getLogger("desktopenv.experiment")
PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
env = DesktopEnv(
path_to_vm=PATH_TO_VM,
action_space=agent.action_space,
task_config=example
)
# reset the environment to certain snapshot
observation = env.reset()
done = False
step_num = 0
if recording:
# send a request to the server to start recording
env.controller.start_recording()
while not done and step_num < max_steps:
actions = agent.predict(observation)
step_num += 1
for action in actions:
# Capture the timestamp before executing the action
action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
logger.info("Step %d: %s", step_num, action)
observation, reward, done, info = env.step(action)
logger.info("Reward: %.2f", reward)
logger.info("Done: %s", done)
logger.info("Info: %s", info)
# Save screenshot and trajectory information
with open(os.path.join(example_trajectory_dir, f"step_{step_num}_{action_timestamp}.png"), "wb") as _f:
with open(observation['screenshot'], "rb") as __f:
screenshot = __f.read()
_f.write(screenshot)
with open(trajectory_recording_path, "a") as f:
f.write(json.dumps({
"step_num": step_num,
"action_timestamp": action_timestamp,
"action": action,
"reward": reward,
"done": done,
"info": info,
"screenshot_file": f"step_{step_num}_{action_timestamp}.png"
}))
f.write("\n")
if done:
logger.info("The episode is done.")
break
if recording:
# send a request to the server to stop recording
env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
result = env.evaluate()
logger.info("Result: %.2f", result)
# env.close()
logger.info("Environment closed.")
if __name__ == "__main__":
action_space = "pyautogui"
example_class = "chrome"
example_id = "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
gpt4_model = "gpt-4-vision-preview"
gemini_model = "gemini-pro-vision"
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r") as f:
example = json.load(f)
example["snapshot"] = "exp_setup4"
api_key = os.environ.get("OPENAI_API_KEY")
agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],
action_space=action_space, exp="som")
# api_key = os.environ.get("GENAI_API_KEY")
# agent = GeminiPro_Agent(api_key=api_key, model=gemini_model, instruction=example['instruction'], action_space=action_space)
root_trajectory_dir = "exp_trajectory"
example_trajectory_dir = os.path.join(root_trajectory_dir, "som", example_class, gpt4_model, example_id)
# example_trajectory_dir = os.path.join(root_trajectory_dir, "som", example_class, gemini_model, example_id)
os.makedirs(example_trajectory_dir, exist_ok=True)
run_one_example(example, agent, 15, example_trajectory_dir)

View File

@@ -1,283 +0,0 @@
# fixme: Need to be rewrite on new action space
import os
import re
import base64
import PIL.Image
import json
import requests
import torch
import argparse
# seem
from seem.modeling.BaseModel import BaseModel as BaseModel_Seem
from seem.utils.distributed import init_distributed as init_distributed_seem
from seem.modeling import build_model as build_model_seem
from task_adapter.seem.tasks import inference_seem_pano
# semantic sam
from semantic_sam.BaseModel import BaseModel
from semantic_sam import build_model
from semantic_sam.utils.dist import init_distributed_mode
from semantic_sam.utils.arguments import load_opt_from_config_file
from semantic_sam.utils.constants import COCO_PANOPTIC_CLASSES
from task_adapter.semantic_sam.tasks import inference_semsam_m2m_auto, prompt_switch
# sam
from segment_anything import sam_model_registry
from task_adapter.sam.tasks.inference_sam_m2m_auto import inference_sam_m2m_auto
from scipy.ndimage import label
from io import BytesIO
import numpy as np
SYS_PROMPT = '''
You will act as an agent which follow my instruction and perform desktop computer tasks as instructed. You must have good knowledge of computer and good internet connection.
For each step, you will get an observation of an image, which is the screenshot of the computer screen. And you will predict the action of the computer based on the image.
Firstly you need to predict the class of your action, select from one below:
- **CLICK**: click on the screen with the specified integer label
- **TYPE**: type a string on the keyboard
- For CLICK, you need to predict the correct integer label shown on the screenshot
for example, format as:
```
{
"action_type": "CLICK",
"label": 7
}
```
- For TYPE, you need to specify the text you want to type
for example, format as:
```
{
"action_type": "TYPE",
"text": "hello world"
}
```
For every step, you should only return the action_type and the parameters of your action as a dict, without any other things. You MUST wrap the dict with backticks (\`).
You can predict multiple actions at one step, but you should only return one action for each step.
You MUST choose and ONLY CHOOSE from the action space above, otherwise your action will be considered as invalid and you will get a penalty.
'''
# build args
semsam_cfg = "configs/semantic_sam_only_sa-1b_swinL.yaml"
seem_cfg = "configs/seem_focall_unicl_lang_v1.yaml"
semsam_ckpt = "./swinl_only_sam_many2many.pth"
sam_ckpt = "./sam_vit_h_4b8939.pth"
seem_ckpt = "./seem_focall_v1.pt"
opt_semsam = load_opt_from_config_file(semsam_cfg)
opt_seem = load_opt_from_config_file(seem_cfg)
opt_seem = init_distributed_seem(opt_seem)
# build model
model_semsam = BaseModel(opt_semsam, build_model(opt_semsam)).from_pretrained(semsam_ckpt).eval().cuda()
model_sam = sam_model_registry["vit_h"](checkpoint=sam_ckpt).eval().cuda()
model_seem = BaseModel_Seem(opt_seem, build_model_seem(opt_seem)).from_pretrained(seem_ckpt).eval().cuda()
with torch.no_grad():
with torch.autocast(device_type='cuda', dtype=torch.float16):
model_seem.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(COCO_PANOPTIC_CLASSES + ["background"], is_eval=True)
@torch.no_grad()
def inference(image, slider, mode, alpha, label_mode, anno_mode, *args, **kwargs):
if slider < 1.5:
model_name = 'seem'
elif slider > 2.5:
model_name = 'sam'
else:
model_name = 'semantic-sam'
if slider < 1.5 + 0.14:
level = [1]
elif slider < 1.5 + 0.28:
level = [2]
elif slider < 1.5 + 0.42:
level = [3]
elif slider < 1.5 + 0.56:
level = [4]
elif slider < 1.5 + 0.70:
level = [5]
elif slider < 1.5 + 0.84:
level = [6]
else:
level = [6, 1, 2, 3, 4, 5]
if label_mode == 'Alphabet':
label_mode = 'a'
else:
label_mode = '1'
text_size, hole_scale, island_scale = 1280, 100, 100
text, text_part, text_thresh = '', '', '0.0'
with torch.autocast(device_type='cuda', dtype=torch.float16):
semantic = False
if model_name == 'semantic-sam':
model = model_semsam
output, mask = inference_semsam_m2m_auto(model, image, level, text, text_part, text_thresh, text_size, hole_scale, island_scale, semantic, label_mode=label_mode, alpha=alpha, anno_mode=anno_mode, *args, **kwargs)
elif model_name == 'sam':
model = model_sam
output, mask = inference_sam_m2m_auto(model, image, text_size, label_mode, alpha, anno_mode)
elif model_name == 'seem':
model = model_seem
output, mask = inference_seem_pano(model, image, text_size, label_mode, alpha, anno_mode)
return output, mask
# Function to encode the image
def encode_image(image):
pil_img = PIL.Image.fromarray(image)
buff = BytesIO()
pil_img.save(buff, format="JPEG")
new_image_string = base64.b64encode(buff.getvalue()).decode("utf-8")
return new_image_string
def parse_actions_from_string(input_string):
# Search for a JSON string within the input string
actions = []
matches = re.findall(r'```json\s+(.*?)\s+```', input_string, re.DOTALL)
if matches:
# Assuming there's only one match, parse the JSON string into a dictionary
try:
for match in matches:
action_dict = json.loads(match)
actions.append(action_dict)
return actions
except json.JSONDecodeError as e:
return f"Failed to parse JSON: {e}"
else:
matches = re.findall(r'```\s+(.*?)\s+```', input_string, re.DOTALL)
if matches:
# Assuming there's only one match, parse the JSON string into a dictionary
try:
for match in matches:
action_dict = json.loads(match)
actions.append(action_dict)
return actions
except json.JSONDecodeError as e:
return f"Failed to parse JSON: {e}"
else:
try:
action_dict = json.loads(input_string)
return [action_dict]
except json.JSONDecodeError as e:
raise ValueError("Invalid response format: " + input_string)
class GPT4v_Agent:
def __init__(self, api_key, instruction, model="gpt-4-vision-preview", max_tokens=300):
self.instruction = instruction
self.model = model
self.max_tokens = max_tokens
self.headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
self.trajectory = [
{
"role": "system",
"content": [
{
"type": "text",
"text": SYS_PROMPT
},
]
}
]
def predict(self, obs):
obs, mask = inference(obs, slider=3.0, mode="Automatic", alpha=0.1, label_mode="Number", anno_mode=["Mark", "Box"])
PIL.Image.fromarray(obs).save("desktop.jpeg")
base64_image = encode_image(obs)
self.trajectory.append({
"role": "user",
"content": [
{
"type": "text",
"text": "What's the next step for instruction '{}'?".format(self.instruction)
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
})
traj_to_show = []
for i in range(len(self.trajectory)):
traj_to_show.append(self.trajectory[i]["content"][0]["text"])
if len(self.trajectory[i]["content"]) > 1:
traj_to_show.append("screenshot_obs")
print("Trajectory:", traj_to_show)
payload = {
"model": self.model,
"messages": self.trajectory,
"max_tokens": self.max_tokens
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=self.headers, json=payload)
try:
actions = self.parse_actions(response.json()['choices'][0]['message']['content'], mask)
except:
print("Failed to parse action from response:", response.json()['choices'][0]['message']['content'])
actions = None
return actions
def parse_actions(self, response: str, mask):
# response example
"""
```json
{
"action_type": "CLICK",
"click_type": "RIGHT"
}
```
"""
# parse from the response
actions = parse_actions_from_string(response)
print(actions)
# add action into the trajectory
self.trajectory.append({
"role": "assistant",
"content": [
{
"type": "text",
"text": response
},
]
})
# parse action
parsed_actions = []
for action in actions:
action_type = action['action_type']
if action_type == "CLICK":
label = int(action['label'])
x, y, w, h = mask[label-1]['bbox']
parsed_actions.append({"action_type": action_type, "x": int(x + w//2) , "y": int(y + h//2)})
if action_type == "TYPE":
parsed_actions.append({"action_type": action_type, "text": action["text"]})
return parsed_actions
if __name__ == '__main__':
# OpenAI API Key
api_key = os.environ.get("OPENAI_API_KEY")
agent = GPT4v_Agent(api_key=api_key, instruction="Open Firefox")
obs = PIL.Image.open('desktop.png')
print(agent.predict(obs=obs))

View File

@@ -41,10 +41,12 @@ def filter_nodes(nodes):
elif node.tag == 'text':
continue
else:
coords = tuple(map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}screencoord').strip('()').split(', ')))
coords = tuple(
map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}screencoord').strip('()').split(', ')))
if coords[0] < 0 or coords[1] < 0:
continue
size = tuple(map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size').strip('()').split(', ')))
size = tuple(
map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size').strip('()').split(', ')))
if size[0] <= 0 or size[1] <= 0:
continue
# Node is not a 'panel', add to the list.
@@ -57,17 +59,20 @@ def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):
# Load the screenshot image
image = Image.open(image_file_path)
draw = ImageDraw.Draw(image)
marks = []
drew_nodes = []
# Optional: Load a font. If you don't specify a font, a default one will be used.
try:
# Adjust the path to the font file you have or use a default one
font = ImageFont.truetype("arial.ttf", 20)
font = ImageFont.truetype("arial.ttf", 15)
except IOError:
# Fallback to a basic font if the specified font can't be loaded
font = ImageFont.load_default()
index = 1
# Loop over all the visible nodes and draw their bounding boxes
for index, _node in enumerate(nodes):
for _node in nodes:
coords_str = _node.attrib.get('{uri:deskat:component.at-spi.gnome.org}screencoord')
size_str = _node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size')
@@ -88,15 +93,45 @@ def draw_bounding_boxes(nodes, image_file_path, output_image_file_path):
if bottom_right[0] < coords[0] or bottom_right[1] < coords[1]:
raise ValueError(f"Invalid coordinates or size, coords: {coords}, size: {size}")
# Draw rectangle on image
draw.rectangle([coords, bottom_right], outline="red", width=2)
# Check if the area only contains one color
cropped_image = image.crop((*coords, *bottom_right))
if len(set(list(cropped_image.getdata()))) == 1:
continue
# Draw index number at the bottom left of the bounding box
# Draw rectangle on image
draw.rectangle([coords, bottom_right], outline="red", width=1)
# Draw index number at the bottom left of the bounding box with black background
text_position = (coords[0], bottom_right[1]) # Adjust Y to be above the bottom right
draw.text(text_position, str(index), font=font, fill="purple")
draw.rectangle([text_position, (text_position[0] + 25, text_position[1] + 18)], fill='black')
draw.text(text_position, str(index), font=font, fill="white")
index += 1
# each mark is an x, y, w, h tuple
marks.append([coords[0], coords[1], size[0], size[1]])
drew_nodes.append(_node)
except ValueError as e:
pass
# Save the result
image.save(output_image_file_path)
return marks, drew_nodes
def print_nodes_with_indent(nodes, indent=0):
for node in nodes:
print(' ' * indent, node.tag, node.attrib)
print_nodes_with_indent(node, indent + 2)
if __name__ == '__main__':
with open('chrome_desktop_example_1.xml', 'r', encoding='utf-8') as f:
xml_file_str = f.read()
filtered_nodes = filter_nodes(find_leaf_nodes(xml_file_str))
print(len(filtered_nodes))
masks = draw_bounding_boxes(filtered_nodes, 'screenshot.png',
'chrome_desktop_example_1_tagged_remove.png', )
# print(masks)
print(len(masks))

View File

@@ -1,3 +1,5 @@
# todo: needs to be refactored
import time
from typing import Dict, List

View File

@@ -1,3 +1,5 @@
# todo: needs to be refactored
import time
from typing import Dict, List

View File

@@ -1,195 +0,0 @@
import base64
import json
import re
import time
from typing import Dict, List
import requests
from mm_agents.accessibility_tree_wrap.heuristic_retrieve import find_leaf_nodes, filter_nodes
from mm_agents.gpt_4_prompt_action import SYS_PROMPT as SYS_PROMPT_ACTION
from mm_agents.gpt_4_prompt_code import SYS_PROMPT as SYS_PROMPT_CODE
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def parse_actions_from_string(input_string):
# Search for a JSON string within the input string
actions = []
matches = re.findall(r'```json\s+(.*?)\s+```', input_string, re.DOTALL)
if matches:
# Assuming there's only one match, parse the JSON string into a dictionary
try:
for match in matches:
action_dict = json.loads(match)
actions.append(action_dict)
return actions
except json.JSONDecodeError as e:
return f"Failed to parse JSON: {e}"
else:
matches = re.findall(r'```\s+(.*?)\s+```', input_string, re.DOTALL)
if matches:
# Assuming there's only one match, parse the JSON string into a dictionary
try:
for match in matches:
action_dict = json.loads(match)
actions.append(action_dict)
return actions
except json.JSONDecodeError as e:
return f"Failed to parse JSON: {e}"
else:
try:
action_dict = json.loads(input_string)
return [action_dict]
except json.JSONDecodeError as e:
raise ValueError("Invalid response format: " + input_string)
def parse_code_from_string(input_string):
# This regular expression will match both ```code``` and ```python code```
# and capture the `code` part. It uses a non-greedy match for the content inside.
pattern = r"```(?:\w+\s+)?(.*?)```"
# Find all non-overlapping matches in the string
matches = re.findall(pattern, input_string, re.DOTALL)
# The regex above captures the content inside the triple backticks.
# The `re.DOTALL` flag allows the dot `.` to match newline characters as well,
# so the code inside backticks can span multiple lines.
# matches now contains all the captured code snippets
codes = []
for match in matches:
match = match.strip()
commands = ['WAIT', 'DONE', 'FAIL'] # fixme: updates this part when we have more commands
if match in commands:
codes.append(match.strip())
elif match.split('\n')[-1] in commands:
if len(match.split('\n')) > 1:
codes.append("\n".join(match.split('\n')[:-1]))
codes.append(match.split('\n')[-1])
else:
codes.append(match)
return codes
class GPT4_Agent:
def __init__(self, api_key, instruction, model="gpt-4-1106-preview", max_tokens=600, action_space="computer_13"):
self.instruction = instruction
self.model = model
self.max_tokens = max_tokens
self.action_space = action_space
self.headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
self.trajectory = [
{
"role": "system",
"content": [
{
"type": "text",
"text": {
"computer_13": SYS_PROMPT_ACTION,
"pyautogui": SYS_PROMPT_CODE
}[action_space] + "\nHere is the instruction for the task: {}".format(self.instruction)
},
]
}
]
def predict(self, obs: Dict) -> List:
"""
Predict the next action(s) based on the current observation.
"""
accessibility_tree = obs["accessibility_tree"]
leaf_nodes = find_leaf_nodes(accessibility_tree)
filtered_nodes = filter_nodes(leaf_nodes)
linearized_accessibility_tree = "tag\ttext\tposition\tsize\n"
# Linearize the accessibility tree nodes into a table format
for node in filtered_nodes:
linearized_accessibility_tree += node.tag + "\t"
linearized_accessibility_tree += node.attrib.get('name') + "\t"
linearized_accessibility_tree += node.attrib.get(
'{uri:deskat:component.at-spi.gnome.org}screencoord') + "\t"
linearized_accessibility_tree += node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size') + "\n"
self.trajectory.append({
"role": "user",
"content": [
{
"type": "text",
"text": "Given the XML format of accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
linearized_accessibility_tree)
}
]
})
# print(
# "Given the XML format of accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
# linearized_accessibility_tree)
# )
traj_to_show = []
for i in range(len(self.trajectory)):
traj_to_show.append(self.trajectory[i]["content"][0]["text"])
if len(self.trajectory[i]["content"]) > 1:
traj_to_show.append("screenshot_obs")
payload = {
"model": self.model,
"messages": self.trajectory,
"max_tokens": self.max_tokens
}
while True:
try:
response = requests.post("https://api.openai.com/v1/chat/completions", headers=self.headers,
json=payload)
break
except:
print("Failed to generate response, retrying...")
time.sleep(5)
pass
try:
actions = self.parse_actions(response.json()['choices'][0]['message']['content'])
except:
print("Failed to parse action from response:", response.json())
actions = None
return actions
def parse_actions(self, response: str):
# parse from the response
if self.action_space == "computer_13":
actions = parse_actions_from_string(response)
elif self.action_space == "pyautogui":
actions = parse_code_from_string(response)
else:
raise ValueError("Invalid action space: " + self.action_space)
# add action into the trajectory
self.trajectory.append({
"role": "assistant",
"content": [
{
"type": "text",
"text": response
},
]
})
return actions

View File

@@ -1,244 +0,0 @@
SYS_PROMPT = """
You will act as an agent which follow my instruction and perform desktop computer tasks as instructed. You must have good knowledge of computer and good internet connection.
For each step, you will get an observation of the desktop by the XML format of accessibility tree, which is based on AT-SPI library. And you will predict the action of the computer based on the accessibility tree.
HERE is the description of the action space you need to predict, follow the format and choose the correct action type and parameters:
ACTION_SPACE = [
{
"action_type": "MOVE_TO",
"note": "move the cursor to the specified position",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": False,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": False,
}
}
},
{
"action_type": "CLICK",
"note": "click the left button if the button not specified, otherwise click the specified button; click at the current position if x and y are not specified, otherwise click at the specified position",
"parameters": {
"button": {
"type": str,
"range": ["left", "right", "middle"],
"optional": True,
},
"x": {
"type": float,
"range": [0, X_MAX],
"optional": True,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": True,
},
"num_clicks": {
"type": int,
"range": [1, 2, 3],
"optional": True,
},
}
},
{
"action_type": "MOUSE_DOWN",
"note": "press the left button if the button not specified, otherwise press the specified button",
"parameters": {
"button": {
"type": str,
"range": ["left", "right", "middle"],
"optional": True,
}
}
},
{
"action_type": "MOUSE_UP",
"note": "release the left button if the button not specified, otherwise release the specified button",
"parameters": {
"button": {
"type": str,
"range": ["left", "right", "middle"],
"optional": True,
}
}
},
{
"action_type": "RIGHT_CLICK",
"note": "right click at the current position if x and y are not specified, otherwise right click at the specified position",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": True,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": True,
}
}
},
{
"action_type": "DOUBLE_CLICK",
"note": "double click at the current position if x and y are not specified, otherwise double click at the specified position",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": True,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": True,
}
}
},
{
"action_type": "DRAG_TO",
"note": "drag the cursor to the specified position with the left button pressed",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": False,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": False,
}
}
},
{
"action_type": "SCROLL",
"note": "scroll the mouse wheel up or down",
"parameters": {
"dx": {
"type": int,
"range": None,
"optional": False,
},
"dy": {
"type": int,
"range": None,
"optional": False,
}
}
},
{
"action_type": "TYPING",
"note": "type the specified text",
"parameters": {
"text": {
"type": str,
"range": None,
"optional": False,
}
}
},
{
"action_type": "PRESS",
"note": "press the specified key and release it",
"parameters": {
"key": {
"type": str,
"range": KEYBOARD_KEYS,
"optional": False,
}
}
},
{
"action_type": "KEY_DOWN",
"note": "press the specified key",
"parameters": {
"key": {
"type": str,
"range": KEYBOARD_KEYS,
"optional": False,
}
}
},
{
"action_type": "KEY_UP",
"note": "release the specified key",
"parameters": {
"key": {
"type": str,
"range": KEYBOARD_KEYS,
"optional": False,
}
}
},
{
"action_type": "HOTKEY",
"note": "press the specified key combination",
"parameters": {
"keys": {
"type": list,
"range": [KEYBOARD_KEYS],
"optional": False,
}
}
},
############################################################################################################
{
"action_type": "WAIT",
"note": "wait until the next action",
},
{
"action_type": "FAIL",
"note": "decide the task can not be performed",
},
{
"action_type": "DONE",
"note": "decide the task is done",
}
]
Firstly you need to predict the class of your action, then you need to predict the parameters of your action:
- For MOUSE_MOVE, you need to predict the x and y coordinate of the mouse cursor, the left top corner of the screen is (0, 0), the right bottom corner of the screen is (1920, 1080)
for example, format as:
```
{
"action_type": "MOUSE_MOVE",
"x": 1319.11,
"y": 65.06
}
```
- For [CLICK, MOUSE_DOWN, MOUSE_UP], you need to specify the click_type as well, select from [LEFT, MIDDLE, RIGHT, WHEEL_UP, WHEEL_DOWN], which means you click the left button, middle button, right button, wheel up or wheel down of your mouse:
for example, format as:
```
{
"action_type": "CLICK",
"click_type": "LEFT"
}
```
- For [KEY, KEY_DOWN, KEY_UP], you need to choose a(multiple) key(s) from the keyboard
for example, format as:
```
{
"action_type": "KEY",
"key": "ctrl+c"
}
```
- For TYPE, you need to specify the text you want to type
for example, format as:
```
{
"action_type": "TYPE",
"text": "hello world"
}
```
REMEMBER:
For every step, you should only RETURN ME THE action_type AND parameters I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
You MUST wrap the dict with backticks (\`).
You MUST choose and ONLY CHOOSE from the action space above, otherwise your action will be considered as invalid and you will get a penalty.
You CAN predict multiple actions at one step, but you should only return one action for each step.
"""

View File

@@ -1,18 +0,0 @@
SYS_PROMPT = """
You are an agent which follow my instruction and perform desktop computer tasks as instructed.
You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
For each step, you will get an observation of the desktop by the XML format of accessibility tree, which is based on AT-SPI library. And you will predict the action of the computer based on the accessibility tree.
You are required to use `pyautogui` to perform the action.
Return one line or multiple lines of python code to perform the action each time, be time efficient.
You ONLY need to return the code inside a code block, like this:
```python
# your code here
```
Specially, it is also allowed to return the following special code:
When you think you have to wait for some time, return ```WAIT```;
When you think the task can not be done, return ```FAIL```, don't easily say ```FAIL```, try your best to do the task;
When you think the task is done, return ```DONE```.
First give the current screenshot and previous things we did a reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
"""

View File

@@ -1,13 +1,26 @@
import base64
import json
import os
import re
import time
import uuid
from typing import Dict, List
import backoff
import requests
from openai.error import (
APIConnectionError,
APIError,
RateLimitError,
ServiceUnavailableError,
InvalidRequestError
)
from mm_agents.gpt_4v_prompt_action import SYS_PROMPT as SYS_PROMPT_ACTION
from mm_agents.gpt_4v_prompt_code import SYS_PROMPT as SYS_PROMPT_CODE
from mm_agents.accessibility_tree_wrap.heuristic_retrieve import find_leaf_nodes, filter_nodes, draw_bounding_boxes
from mm_agents.prompts import SYS_PROMPT_IN_SCREENSHOT_OUT_CODE, SYS_PROMPT_IN_SCREENSHOT_OUT_ACTION, \
SYS_PROMPT_IN_A11Y_OUT_CODE, SYS_PROMPT_IN_A11Y_OUT_ACTION, \
SYS_PROMPT_IN_BOTH_OUT_CODE, SYS_PROMPT_IN_BOTH_OUT_ACTION, \
SYS_PROMPT_IN_SOM_A11Y_OUT_TAG, \
SYS_PROMPT_SEEACT, ACTION_DESCRIPTION_PROMPT_SEEACT, ACTION_GROUNDING_PROMPT_SEEACT
# Function to encode the image
@@ -16,6 +29,35 @@ def encode_image(image_path):
return base64.b64encode(image_file.read()).decode('utf-8')
def linearize_accessibility_tree(accessibility_tree):
leaf_nodes = find_leaf_nodes(accessibility_tree)
filtered_nodes = filter_nodes(leaf_nodes)
linearized_accessibility_tree = "tag\ttext\tposition\tsize\n"
# Linearize the accessibility tree nodes into a table format
for node in filtered_nodes:
linearized_accessibility_tree += node.tag + "\t"
linearized_accessibility_tree += node.attrib.get('name') + "\t"
linearized_accessibility_tree += node.attrib.get(
'{uri:deskat:component.at-spi.gnome.org}screencoord') + "\t"
linearized_accessibility_tree += node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size') + "\n"
return linearized_accessibility_tree
def tag_screenshot(screenshot, accessibility_tree):
# Creat a tmp file to store the screenshot in random name
uuid_str = str(uuid.uuid4())
os.makedirs("tmp/images", exist_ok=True)
tagged_screenshot_file_path = os.path.join("tmp/images", uuid_str + ".png")
nodes = filter_nodes(find_leaf_nodes(accessibility_tree))
# Make tag screenshot
marks, drew_nodes = draw_bounding_boxes(nodes, screenshot, tagged_screenshot_file_path)
return marks, drew_nodes, tagged_screenshot_file_path
def parse_actions_from_string(input_string):
# Search for a JSON string within the input string
actions = []
@@ -60,106 +102,424 @@ def parse_code_from_string(input_string):
# so the code inside backticks can span multiple lines.
# matches now contains all the captured code snippets
return matches
codes = []
for match in matches:
match = match.strip()
commands = ['WAIT', 'DONE', 'FAIL'] # fixme: updates this part when we have more commands
if match in commands:
codes.append(match.strip())
elif match.split('\n')[-1] in commands:
if len(match.split('\n')) > 1:
codes.append("\n".join(match.split('\n')[:-1]))
codes.append(match.split('\n')[-1])
else:
codes.append(match)
return codes
def parse_code_from_som_string(input_string, masks):
# parse the output string by masks
mappings = []
for i, mask in enumerate(masks):
x, y, w, h = mask
mappings.append(("tag#" + str(i + 1), "{}, {}".format(int(x + w // 2), int(y + h // 2))))
# reverse the mappings
for mapping in mappings[::-1]:
input_string = input_string.replace(mapping[0], mapping[1])
actions = parse_code_from_string(input_string)
return actions
class GPT4v_Agent:
def __init__(self, api_key, instruction, model="gpt-4-vision-preview", max_tokens=300, action_space="computer_13"):
def __init__(
self,
api_key,
instruction,
model="gpt-4-vision-preview",
max_tokens=500,
action_space="computer_13",
exp="screenshot_a11y_tree"
# exp can be in ["screenshot", "a11y_tree", "screenshot_a11y_tree", "som", "seeact"]
):
self.instruction = instruction
self.model = model
self.max_tokens = max_tokens
self.action_space = action_space
self.exp = exp
self.max_trajectory_length = 3
self.headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
self.trajectory = [
{
"role": "system",
"content": [
{
"type": "text",
"text": {
"computer_13": SYS_PROMPT_ACTION,
"pyautogui": SYS_PROMPT_CODE
}[action_space] + "\nHere is the instruction for the task: {}".format(self.instruction)
},
]
}
]
self.actions = []
self.observations = []
if exp == "screenshot":
if action_space == "computer_13":
self.system_message = SYS_PROMPT_IN_SCREENSHOT_OUT_ACTION
elif action_space == "pyautogui":
self.system_message = SYS_PROMPT_IN_SCREENSHOT_OUT_CODE
else:
raise ValueError("Invalid action space: " + action_space)
elif exp == "a11y_tree":
if action_space == "computer_13":
self.system_message = SYS_PROMPT_IN_A11Y_OUT_ACTION
elif action_space == "pyautogui":
self.system_message = SYS_PROMPT_IN_A11Y_OUT_CODE
else:
raise ValueError("Invalid action space: " + action_space)
elif exp == "both":
if action_space == "computer_13":
self.system_message = SYS_PROMPT_IN_BOTH_OUT_ACTION
elif action_space == "pyautogui":
self.system_message = SYS_PROMPT_IN_BOTH_OUT_CODE
else:
raise ValueError("Invalid action space: " + action_space)
elif exp == "som":
if action_space == "computer_13":
raise ValueError("Invalid action space: " + action_space)
elif action_space == "pyautogui":
self.system_message = SYS_PROMPT_IN_SOM_A11Y_OUT_TAG
else:
raise ValueError("Invalid action space: " + action_space)
elif exp == "seeact":
if action_space == "computer_13":
raise ValueError("Invalid action space: " + action_space)
elif action_space == "pyautogui":
self.system_message = SYS_PROMPT_SEEACT
else:
raise ValueError("Invalid action space: " + action_space)
else:
raise ValueError("Invalid experiment type: " + exp)
self.system_message = self.system_message + "\nYou are asked to complete the following task: {}".format(
self.instruction)
def predict(self, obs: Dict) -> List:
"""
Predict the next action(s) based on the current observation.
"""
base64_image = encode_image(obs["screenshot"])
self.trajectory.append({
"role": "user",
# Prepare the payload for the API call
messages = []
masks = None
messages.append({
"role": "system",
"content": [
{
"type": "text",
"text": "What's the next step that you will do to help with the task?"
"text": self.system_message
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
})
traj_to_show = []
for i in range(len(self.trajectory)):
traj_to_show.append(self.trajectory[i]["content"][0]["text"])
if len(self.trajectory[i]["content"]) > 1:
traj_to_show.append("screenshot_obs")
# Append trajectory
assert len(self.observations) == len(self.actions), "The number of observations and actions should be the same."
print("Trajectory:", traj_to_show)
if len(self.observations) > self.max_trajectory_length:
_observations = self.observations[-self.max_trajectory_length:]
_actions = self.actions[-self.max_trajectory_length:]
else:
_observations = self.observations
_actions = self.actions
payload = {
for previous_obs, previous_action in zip(_observations, _actions):
if self.exp == "both":
_screenshot = previous_obs["screenshot"]
_linearized_accessibility_tree = previous_obs["accessibility_tree"]
messages.append({
"role": "user",
"content": [
{
"type": "text",
"text": "Given the screenshot and info from accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
_linearized_accessibility_tree)
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{_screenshot}",
"detail": "high"
}
}
]
})
elif self.exp in ["som", "seeact"]:
_screenshot = previous_obs["screenshot"]
_linearized_accessibility_tree = previous_obs["accessibility_tree"]
messages.append({
"role": "user",
"content": [
{
"type": "text",
"text": "Given the tagged screenshot and info from accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
_linearized_accessibility_tree)
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{_screenshot}",
"detail": "high"
}
}
]
})
elif self.exp == "screenshot":
_screenshot = previous_obs["screenshot"]
messages.append({
"role": "user",
"content": [
{
"type": "text",
"text": "Given the screenshot as below. What's the next step that you will do to help with the task?"
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{_screenshot}",
"detail": "high"
}
}
]
})
elif self.exp == "a11y_tree":
_linearized_accessibility_tree = previous_obs["accessibility_tree"]
messages.append({
"role": "user",
"content": [
{
"type": "text",
"text": "Given the info from accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
_linearized_accessibility_tree)
}
]
})
else:
raise ValueError("Invalid experiment type: " + self.exp)
messages.append({
"role": "assistant",
"content": [
{
"type": "text",
"text": "\n".join(previous_action) if len(previous_action) > 0 else "No valid action"
},
]
})
if self.exp in ["screenshot", "both"]:
base64_image = encode_image(obs["screenshot"])
linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
if self.exp == "both":
self.observations.append({
"screenshot": base64_image,
"accessibility_tree": linearized_accessibility_tree
})
else:
self.observations.append({
"screenshot": base64_image,
"accessibility_tree": None
})
messages.append({
"role": "user",
"content": [
{
"type": "text",
"text": "Given the screenshot as below. What's the next step that you will do to help with the task?"
if self.exp == "screenshot"
else "Given the screenshot and info from accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
linearized_accessibility_tree)
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
"detail": "high"
}
}
]
})
elif self.exp == "a11y_tree":
linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
self.observations.append({
"screenshot": None,
"accessibility_tree": linearized_accessibility_tree
})
messages.append({
"role": "user",
"content": [
{
"type": "text",
"text": "Given the info from accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
linearized_accessibility_tree)
}
]
})
elif self.exp == "som":
# Add som to the screenshot
masks, drew_nodes, tagged_screenshot = tag_screenshot(obs["screenshot"], obs["accessibility_tree"])
base64_image = encode_image(tagged_screenshot)
linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
self.observations.append({
"screenshot": base64_image,
"accessibility_tree": linearized_accessibility_tree
})
messages.append({
"role": "user",
"content": [
{
"type": "text",
"text": "Given the tagged screenshot and info from accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
linearized_accessibility_tree)
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
"detail": "high"
}
}
]
})
elif self.exp == "seeact":
# Add som to the screenshot
masks, drew_nodes, tagged_screenshot = tag_screenshot(obs["screenshot"], obs["accessibility_tree"])
base64_image = encode_image(tagged_screenshot)
linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
self.observations.append({
"screenshot": base64_image,
"accessibility_tree": linearized_accessibility_tree
})
messages.append({
"role": "user",
"content": [
{
"type": "text",
"text": ACTION_DESCRIPTION_PROMPT_SEEACT.format(linearized_accessibility_tree)
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
"detail": "high"
}
}
]
})
else:
raise ValueError("Invalid experiment type: " + self.exp)
with open("messages.json", "w") as f:
f.write(json.dumps(messages, indent=4))
response = self.call_llm({
"model": self.model,
"messages": self.trajectory,
"messages": messages,
"max_tokens": self.max_tokens
}
})
print(response)
if self.exp == "seeact":
messages.append({
"role": "assistant",
"content": [
{
"type": "text",
"text": response
}
]
})
messages.append({
"role": "user",
"content": [
{
"type": "text",
"text": "{}\n\nWhat's the next step that you will do to help with the task?".format(
ACTION_GROUNDING_PROMPT_SEEACT)
}
]
})
response = self.call_llm({
"model": self.model,
"messages": messages,
"max_tokens": self.max_tokens
})
print(response)
while True:
try:
response = requests.post("https://api.openai.com/v1/chat/completions", headers=self.headers,
json=payload)
break
except:
print("Failed to generate response, retrying...")
time.sleep(5)
pass
try:
actions = self.parse_actions(response.json()['choices'][0]['message']['content'])
except:
print("Failed to parse action from response:", response.json())
actions = self.parse_actions(response, masks)
except Exception as e:
print("Failed to parse action from response", e)
actions = None
return actions
def parse_actions(self, response: str):
# parse from the response
if self.action_space == "computer_13":
actions = parse_actions_from_string(response)
elif self.action_space == "pyautogui":
actions = parse_code_from_string(response)
@backoff.on_exception(
backoff.expo,
(APIError, RateLimitError, APIConnectionError, ServiceUnavailableError, InvalidRequestError),
)
def call_llm(self, payload):
response = requests.post(
"https://api.openai.com/v1/chat/completions",
headers=self.headers,
json=payload
)
if response.status_code != 200:
print("Failed to call LLM: " + response.text)
return ""
else:
raise ValueError("Invalid action space: " + self.action_space)
return response.json()['choices'][0]['message']['content']
# add action into the trajectory
self.trajectory.append({
"role": "assistant",
"content": [
{
"type": "text",
"text": response
},
]
})
def parse_actions(self, response: str, masks=None):
return actions
if self.exp in ["screenshot", "a11y_tree", "both"]:
# parse from the response
if self.action_space == "computer_13":
actions = parse_actions_from_string(response)
elif self.action_space == "pyautogui":
actions = parse_code_from_string(response)
else:
raise ValueError("Invalid action space: " + self.action_space)
self.actions.append(actions)
return actions
elif self.exp in ["som", "seeact"]:
# parse from the response
if self.action_space == "computer_13":
raise ValueError("Invalid action space: " + self.action_space)
elif self.action_space == "pyautogui":
actions = parse_code_from_som_string(response, masks)
else:
raise ValueError("Invalid action space: " + self.action_space)
self.actions.append(actions)
return actions

View File

@@ -1,244 +0,0 @@
SYS_PROMPT = """
You will act as an agent which follow my instruction and perform desktop computer tasks as instructed. You must have good knowledge of computer and good internet connection.
For each step, you will get an observation of an image, which is the screenshot of the computer screen. And you will predict the action of the computer based on the image.
HERE is the description of the action space you need to predict, follow the format and choose the correct action type and parameters:
ACTION_SPACE = [
{
"action_type": "MOVE_TO",
"note": "move the cursor to the specified position",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": False,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": False,
}
}
},
{
"action_type": "CLICK",
"note": "click the left button if the button not specified, otherwise click the specified button; click at the current position if x and y are not specified, otherwise click at the specified position",
"parameters": {
"button": {
"type": str,
"range": ["left", "right", "middle"],
"optional": True,
},
"x": {
"type": float,
"range": [0, X_MAX],
"optional": True,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": True,
},
"num_clicks": {
"type": int,
"range": [1, 2, 3],
"optional": True,
},
}
},
{
"action_type": "MOUSE_DOWN",
"note": "press the left button if the button not specified, otherwise press the specified button",
"parameters": {
"button": {
"type": str,
"range": ["left", "right", "middle"],
"optional": True,
}
}
},
{
"action_type": "MOUSE_UP",
"note": "release the left button if the button not specified, otherwise release the specified button",
"parameters": {
"button": {
"type": str,
"range": ["left", "right", "middle"],
"optional": True,
}
}
},
{
"action_type": "RIGHT_CLICK",
"note": "right click at the current position if x and y are not specified, otherwise right click at the specified position",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": True,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": True,
}
}
},
{
"action_type": "DOUBLE_CLICK",
"note": "double click at the current position if x and y are not specified, otherwise double click at the specified position",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": True,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": True,
}
}
},
{
"action_type": "DRAG_TO",
"note": "drag the cursor to the specified position with the left button pressed",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": False,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": False,
}
}
},
{
"action_type": "SCROLL",
"note": "scroll the mouse wheel up or down",
"parameters": {
"dx": {
"type": int,
"range": None,
"optional": False,
},
"dy": {
"type": int,
"range": None,
"optional": False,
}
}
},
{
"action_type": "TYPING",
"note": "type the specified text",
"parameters": {
"text": {
"type": str,
"range": None,
"optional": False,
}
}
},
{
"action_type": "PRESS",
"note": "press the specified key and release it",
"parameters": {
"key": {
"type": str,
"range": KEYBOARD_KEYS,
"optional": False,
}
}
},
{
"action_type": "KEY_DOWN",
"note": "press the specified key",
"parameters": {
"key": {
"type": str,
"range": KEYBOARD_KEYS,
"optional": False,
}
}
},
{
"action_type": "KEY_UP",
"note": "release the specified key",
"parameters": {
"key": {
"type": str,
"range": KEYBOARD_KEYS,
"optional": False,
}
}
},
{
"action_type": "HOTKEY",
"note": "press the specified key combination",
"parameters": {
"keys": {
"type": list,
"range": [KEYBOARD_KEYS],
"optional": False,
}
}
},
############################################################################################################
{
"action_type": "WAIT",
"note": "wait until the next action",
},
{
"action_type": "FAIL",
"note": "decide the task can not be performed",
},
{
"action_type": "DONE",
"note": "decide the task is done",
}
]
Firstly you need to predict the class of your action, then you need to predict the parameters of your action:
- For MOUSE_MOVE, you need to predict the x and y coordinate of the mouse cursor, the left top corner of the screen is (0, 0), the right bottom corner of the screen is (1920, 1080)
for example, format as:
```
{
"action_type": "MOUSE_MOVE",
"x": 1319.11,
"y": 65.06
}
```
- For [CLICK, MOUSE_DOWN, MOUSE_UP], you need to specify the click_type as well, select from [LEFT, MIDDLE, RIGHT, WHEEL_UP, WHEEL_DOWN], which means you click the left button, middle button, right button, wheel up or wheel down of your mouse:
for example, format as:
```
{
"action_type": "CLICK",
"click_type": "LEFT"
}
```
- For [KEY, KEY_DOWN, KEY_UP], you need to choose a(multiple) key(s) from the keyboard
for example, format as:
```
{
"action_type": "KEY",
"key": "ctrl+c"
}
```
- For TYPE, you need to specify the text you want to type
for example, format as:
```
{
"action_type": "TYPE",
"text": "hello world"
}
```
REMEMBER:
For every step, you should only RETURN ME THE action_type AND parameters I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
You MUST wrap the dict with backticks (\`).
You MUST choose and ONLY CHOOSE from the action space above, otherwise your action will be considered as invalid and you will get a penalty.
You CAN predict multiple actions at one step, but you should only return one action for each step.
"""

View File

@@ -1,18 +0,0 @@
SYS_PROMPT = """
You are an agent which follow my instruction and perform desktop computer tasks as instructed.
You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
For each step, you will get an observation of an image, which is the screenshot of the computer screen and you will predict the action of the computer based on the image.
You are required to use `pyautogui` to perform the action.
Return one line or multiple lines of python code to perform the action each time, be time efficient.
You ONLY need to return the code inside a code block, like this:
```python
# your code here
```
Specially, it is also allowed to return the following special code:
When you think you have to wait for some time, return ```WAIT```;
When you think the task can not be done, return ```FAIL```, don't easily say ```FAIL```, try your best to do the task;
When you think the task is done, return ```DONE```.
First give the current screenshot and previous things we did a reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
"""

868
mm_agents/prompts.py Normal file
View File

@@ -0,0 +1,868 @@
SYS_PROMPT_IN_SCREENSHOT_OUT_CODE = """
You are an agent which follow my instruction and perform desktop computer tasks as instructed.
You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
For each step, you will get an observation of an image, which is the screenshot of the computer screen and you will predict the action of the computer based on the image.
You are required to use `pyautogui` to perform the action, but don't use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with.
Return one line or multiple lines of python code to perform the action each time, be time efficient.
You ONLY need to return the code inside a code block, like this:
```python
# your code here
```
Specially, it is also allowed to return the following special code:
When you think you have to wait for some time, return ```WAIT```;
When you think the task can not be done, return ```FAIL```, don't easily say ```FAIL```, try your best to do the task;
When you think the task is done, return ```DONE```.
First give the current screenshot and previous things we did a short reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
""".strip()
SYS_PROMPT_IN_SCREENSHOT_OUT_ACTION = """
You will act as an agent which follow my instruction and perform desktop computer tasks as instructed. You must have good knowledge of computer and good internet connection.
For each step, you will get an observation of an image, which is the screenshot of the computer screen. And you will predict the action of the computer based on the image.
HERE is the description of the action space you need to predict, follow the format and choose the correct action type and parameters:
ACTION_SPACE = [
{
"action_type": "MOVE_TO",
"note": "move the cursor to the specified position",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": False,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": False,
}
}
},
{
"action_type": "CLICK",
"note": "click the left button if the button not specified, otherwise click the specified button; click at the current position if x and y are not specified, otherwise click at the specified position",
"parameters": {
"button": {
"type": str,
"range": ["left", "right", "middle"],
"optional": True,
},
"x": {
"type": float,
"range": [0, X_MAX],
"optional": True,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": True,
},
"num_clicks": {
"type": int,
"range": [1, 2, 3],
"optional": True,
},
}
},
{
"action_type": "MOUSE_DOWN",
"note": "press the left button if the button not specified, otherwise press the specified button",
"parameters": {
"button": {
"type": str,
"range": ["left", "right", "middle"],
"optional": True,
}
}
},
{
"action_type": "MOUSE_UP",
"note": "release the left button if the button not specified, otherwise release the specified button",
"parameters": {
"button": {
"type": str,
"range": ["left", "right", "middle"],
"optional": True,
}
}
},
{
"action_type": "RIGHT_CLICK",
"note": "right click at the current position if x and y are not specified, otherwise right click at the specified position",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": True,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": True,
}
}
},
{
"action_type": "DOUBLE_CLICK",
"note": "double click at the current position if x and y are not specified, otherwise double click at the specified position",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": True,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": True,
}
}
},
{
"action_type": "DRAG_TO",
"note": "drag the cursor to the specified position with the left button pressed",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": False,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": False,
}
}
},
{
"action_type": "SCROLL",
"note": "scroll the mouse wheel up or down",
"parameters": {
"dx": {
"type": int,
"range": None,
"optional": False,
},
"dy": {
"type": int,
"range": None,
"optional": False,
}
}
},
{
"action_type": "TYPING",
"note": "type the specified text",
"parameters": {
"text": {
"type": str,
"range": None,
"optional": False,
}
}
},
{
"action_type": "PRESS",
"note": "press the specified key and release it",
"parameters": {
"key": {
"type": str,
"range": KEYBOARD_KEYS,
"optional": False,
}
}
},
{
"action_type": "KEY_DOWN",
"note": "press the specified key",
"parameters": {
"key": {
"type": str,
"range": KEYBOARD_KEYS,
"optional": False,
}
}
},
{
"action_type": "KEY_UP",
"note": "release the specified key",
"parameters": {
"key": {
"type": str,
"range": KEYBOARD_KEYS,
"optional": False,
}
}
},
{
"action_type": "HOTKEY",
"note": "press the specified key combination",
"parameters": {
"keys": {
"type": list,
"range": [KEYBOARD_KEYS],
"optional": False,
}
}
},
############################################################################################################
{
"action_type": "WAIT",
"note": "wait until the next action",
},
{
"action_type": "FAIL",
"note": "decide the task can not be performed",
},
{
"action_type": "DONE",
"note": "decide the task is done",
}
]
Firstly you need to predict the class of your action, then you need to predict the parameters of your action:
- For MOUSE_MOVE, you need to predict the x and y coordinate of the mouse cursor, the left top corner of the screen is (0, 0), the right bottom corner of the screen is (1920, 1080)
for example, format as:
```
{
"action_type": "MOUSE_MOVE",
"x": 1319.11,
"y": 65.06
}
```
- For [CLICK, MOUSE_DOWN, MOUSE_UP], you need to specify the click_type as well, select from [LEFT, MIDDLE, RIGHT, WHEEL_UP, WHEEL_DOWN], which means you click the left button, middle button, right button, wheel up or wheel down of your mouse:
for example, format as:
```
{
"action_type": "CLICK",
"click_type": "LEFT"
}
```
- For [KEY, KEY_DOWN, KEY_UP], you need to choose a(multiple) key(s) from the keyboard
for example, format as:
```
{
"action_type": "KEY",
"key": "ctrl+c"
}
```
- For TYPE, you need to specify the text you want to type
for example, format as:
```
{
"action_type": "TYPE",
"text": "hello world"
}
```
REMEMBER:
For every step, you should only RETURN ME THE action_type AND parameters I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
You MUST wrap the dict with backticks (\`).
You MUST choose and ONLY CHOOSE from the action space above, otherwise your action will be considered as invalid and you will get a penalty.
You CAN predict multiple actions at one step, but you should only return one action for each step.
""".strip()
SYS_PROMPT_IN_A11Y_OUT_CODE = """
You are an agent which follow my instruction and perform desktop computer tasks as instructed.
You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
For each step, you will get an observation of the desktop by accessibility tree, which is based on AT-SPI library. And you will predict the action of the computer based on the accessibility tree.
You are required to use `pyautogui` to perform the action, but don't use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with.
Return one line or multiple lines of python code to perform the action each time, be time efficient.
You ONLY need to return the code inside a code block, like this:
```python
# your code here
```
Specially, it is also allowed to return the following special code:
When you think you have to wait for some time, return ```WAIT```;
When you think the task can not be done, return ```FAIL```, don't easily say ```FAIL```, try your best to do the task;
When you think the task is done, return ```DONE```.
First give the current screenshot and previous things we did a short reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
""".strip()
SYS_PROMPT_IN_A11Y_OUT_ACTION = """
You will act as an agent which follow my instruction and perform desktop computer tasks as instructed. You must have good knowledge of computer and good internet connection.
For each step, you will get an observation of the desktop by accessibility tree, which is based on AT-SPI library. And you will predict the action of the computer based on the accessibility tree.
HERE is the description of the action space you need to predict, follow the format and choose the correct action type and parameters:
ACTION_SPACE = [
{
"action_type": "MOVE_TO",
"note": "move the cursor to the specified position",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": False,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": False,
}
}
},
{
"action_type": "CLICK",
"note": "click the left button if the button not specified, otherwise click the specified button; click at the current position if x and y are not specified, otherwise click at the specified position",
"parameters": {
"button": {
"type": str,
"range": ["left", "right", "middle"],
"optional": True,
},
"x": {
"type": float,
"range": [0, X_MAX],
"optional": True,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": True,
},
"num_clicks": {
"type": int,
"range": [1, 2, 3],
"optional": True,
},
}
},
{
"action_type": "MOUSE_DOWN",
"note": "press the left button if the button not specified, otherwise press the specified button",
"parameters": {
"button": {
"type": str,
"range": ["left", "right", "middle"],
"optional": True,
}
}
},
{
"action_type": "MOUSE_UP",
"note": "release the left button if the button not specified, otherwise release the specified button",
"parameters": {
"button": {
"type": str,
"range": ["left", "right", "middle"],
"optional": True,
}
}
},
{
"action_type": "RIGHT_CLICK",
"note": "right click at the current position if x and y are not specified, otherwise right click at the specified position",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": True,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": True,
}
}
},
{
"action_type": "DOUBLE_CLICK",
"note": "double click at the current position if x and y are not specified, otherwise double click at the specified position",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": True,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": True,
}
}
},
{
"action_type": "DRAG_TO",
"note": "drag the cursor to the specified position with the left button pressed",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": False,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": False,
}
}
},
{
"action_type": "SCROLL",
"note": "scroll the mouse wheel up or down",
"parameters": {
"dx": {
"type": int,
"range": None,
"optional": False,
},
"dy": {
"type": int,
"range": None,
"optional": False,
}
}
},
{
"action_type": "TYPING",
"note": "type the specified text",
"parameters": {
"text": {
"type": str,
"range": None,
"optional": False,
}
}
},
{
"action_type": "PRESS",
"note": "press the specified key and release it",
"parameters": {
"key": {
"type": str,
"range": KEYBOARD_KEYS,
"optional": False,
}
}
},
{
"action_type": "KEY_DOWN",
"note": "press the specified key",
"parameters": {
"key": {
"type": str,
"range": KEYBOARD_KEYS,
"optional": False,
}
}
},
{
"action_type": "KEY_UP",
"note": "release the specified key",
"parameters": {
"key": {
"type": str,
"range": KEYBOARD_KEYS,
"optional": False,
}
}
},
{
"action_type": "HOTKEY",
"note": "press the specified key combination",
"parameters": {
"keys": {
"type": list,
"range": [KEYBOARD_KEYS],
"optional": False,
}
}
},
############################################################################################################
{
"action_type": "WAIT",
"note": "wait until the next action",
},
{
"action_type": "FAIL",
"note": "decide the task can not be performed",
},
{
"action_type": "DONE",
"note": "decide the task is done",
}
]
Firstly you need to predict the class of your action, then you need to predict the parameters of your action:
- For MOUSE_MOVE, you need to predict the x and y coordinate of the mouse cursor, the left top corner of the screen is (0, 0), the right bottom corner of the screen is (1920, 1080)
for example, format as:
```
{
"action_type": "MOUSE_MOVE",
"x": 1319.11,
"y": 65.06
}
```
- For [CLICK, MOUSE_DOWN, MOUSE_UP], you need to specify the click_type as well, select from [LEFT, MIDDLE, RIGHT, WHEEL_UP, WHEEL_DOWN], which means you click the left button, middle button, right button, wheel up or wheel down of your mouse:
for example, format as:
```
{
"action_type": "CLICK",
"click_type": "LEFT"
}
```
- For [KEY, KEY_DOWN, KEY_UP], you need to choose a(multiple) key(s) from the keyboard
for example, format as:
```
{
"action_type": "KEY",
"key": "ctrl+c"
}
```
- For TYPE, you need to specify the text you want to type
for example, format as:
```
{
"action_type": "TYPE",
"text": "hello world"
}
```
REMEMBER:
For every step, you should only RETURN ME THE action_type AND parameters I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
You MUST wrap the dict with backticks (\`).
You MUST choose and ONLY CHOOSE from the action space above, otherwise your action will be considered as invalid and you will get a penalty.
You CAN predict multiple actions at one step, but you should only return one action for each step.
""".strip()
SYS_PROMPT_IN_BOTH_OUT_CODE = """
You are an agent which follow my instruction and perform desktop computer tasks as instructed.
You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
For each step, you will get an observation of the desktop by 1) a screenshot; and 2) accessibility tree, which is based on AT-SPI library.
And you will predict the action of the computer based on the screenshot and accessibility tree.
You are required to use `pyautogui` to perform the action, but don't use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with.
Return one line or multiple lines of python code to perform the action each time, be time efficient.
You ONLY need to return the code inside a code block, like this:
```python
# your code here
```
Specially, it is also allowed to return the following special code:
When you think you have to wait for some time, return ```WAIT```;
When you think the task can not be done, return ```FAIL```, don't easily say ```FAIL```, try your best to do the task;
When you think the task is done, return ```DONE```.
First give the current screenshot and previous things we did a short reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
""".strip()
SYS_PROMPT_IN_BOTH_OUT_ACTION = """
You will act as an agent which follow my instruction and perform desktop computer tasks as instructed. You must have good knowledge of computer and good internet connection.
For each step, you will get an observation of the desktop by 1) a screenshot; and 2) accessibility tree, which is based on AT-SPI library.
And you will predict the action of the computer based on the screenshot and accessibility tree.
HERE is the description of the action space you need to predict, follow the format and choose the correct action type and parameters:
ACTION_SPACE = [
{
"action_type": "MOVE_TO",
"note": "move the cursor to the specified position",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": False,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": False,
}
}
},
{
"action_type": "CLICK",
"note": "click the left button if the button not specified, otherwise click the specified button; click at the current position if x and y are not specified, otherwise click at the specified position",
"parameters": {
"button": {
"type": str,
"range": ["left", "right", "middle"],
"optional": True,
},
"x": {
"type": float,
"range": [0, X_MAX],
"optional": True,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": True,
},
"num_clicks": {
"type": int,
"range": [1, 2, 3],
"optional": True,
},
}
},
{
"action_type": "MOUSE_DOWN",
"note": "press the left button if the button not specified, otherwise press the specified button",
"parameters": {
"button": {
"type": str,
"range": ["left", "right", "middle"],
"optional": True,
}
}
},
{
"action_type": "MOUSE_UP",
"note": "release the left button if the button not specified, otherwise release the specified button",
"parameters": {
"button": {
"type": str,
"range": ["left", "right", "middle"],
"optional": True,
}
}
},
{
"action_type": "RIGHT_CLICK",
"note": "right click at the current position if x and y are not specified, otherwise right click at the specified position",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": True,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": True,
}
}
},
{
"action_type": "DOUBLE_CLICK",
"note": "double click at the current position if x and y are not specified, otherwise double click at the specified position",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": True,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": True,
}
}
},
{
"action_type": "DRAG_TO",
"note": "drag the cursor to the specified position with the left button pressed",
"parameters": {
"x": {
"type": float,
"range": [0, X_MAX],
"optional": False,
},
"y": {
"type": float,
"range": [0, Y_MAX],
"optional": False,
}
}
},
{
"action_type": "SCROLL",
"note": "scroll the mouse wheel up or down",
"parameters": {
"dx": {
"type": int,
"range": None,
"optional": False,
},
"dy": {
"type": int,
"range": None,
"optional": False,
}
}
},
{
"action_type": "TYPING",
"note": "type the specified text",
"parameters": {
"text": {
"type": str,
"range": None,
"optional": False,
}
}
},
{
"action_type": "PRESS",
"note": "press the specified key and release it",
"parameters": {
"key": {
"type": str,
"range": KEYBOARD_KEYS,
"optional": False,
}
}
},
{
"action_type": "KEY_DOWN",
"note": "press the specified key",
"parameters": {
"key": {
"type": str,
"range": KEYBOARD_KEYS,
"optional": False,
}
}
},
{
"action_type": "KEY_UP",
"note": "release the specified key",
"parameters": {
"key": {
"type": str,
"range": KEYBOARD_KEYS,
"optional": False,
}
}
},
{
"action_type": "HOTKEY",
"note": "press the specified key combination",
"parameters": {
"keys": {
"type": list,
"range": [KEYBOARD_KEYS],
"optional": False,
}
}
},
############################################################################################################
{
"action_type": "WAIT",
"note": "wait until the next action",
},
{
"action_type": "FAIL",
"note": "decide the task can not be performed",
},
{
"action_type": "DONE",
"note": "decide the task is done",
}
]
Firstly you need to predict the class of your action, then you need to predict the parameters of your action:
- For MOUSE_MOVE, you need to predict the x and y coordinate of the mouse cursor, the left top corner of the screen is (0, 0), the right bottom corner of the screen is (1920, 1080)
for example, format as:
```
{
"action_type": "MOUSE_MOVE",
"x": 1319.11,
"y": 65.06
}
```
- For [CLICK, MOUSE_DOWN, MOUSE_UP], you need to specify the click_type as well, select from [LEFT, MIDDLE, RIGHT, WHEEL_UP, WHEEL_DOWN], which means you click the left button, middle button, right button, wheel up or wheel down of your mouse:
for example, format as:
```
{
"action_type": "CLICK",
"click_type": "LEFT"
}
```
- For [KEY, KEY_DOWN, KEY_UP], you need to choose a(multiple) key(s) from the keyboard
for example, format as:
```
{
"action_type": "KEY",
"key": "ctrl+c"
}
```
- For TYPE, you need to specify the text you want to type
for example, format as:
```
{
"action_type": "TYPE",
"text": "hello world"
}
```
REMEMBER:
For every step, you should only RETURN ME THE action_type AND parameters I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
You MUST wrap the dict with backticks (\`).
You MUST choose and ONLY CHOOSE from the action space above, otherwise your action will be considered as invalid and you will get a penalty.
You CAN predict multiple actions at one step, but you should only return one action for each step.
""".strip()
SYS_PROMPT_IN_SOM_A11Y_OUT_TAG = """
You are an agent which follow my instruction and perform desktop computer tasks as instructed.
You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
For each step, you will get an observation of the desktop by 1) a screenshot; and 2) accessibility tree, which is based on AT-SPI library.
You are required to use `pyautogui` to perform the action, but don't use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with.
You can replace x, y in the code with the tag of the element you want to operate with. such as:
```python
pyautogui.moveTo(tag#3)
pyautogui.click(tag#2)
pyautogui.dragTo(tag#1, button='left')
```
When you think you can directly output precise x and y coordinates or there is no tag on which you want to interact, you can also use them directly.
But you should be careful to ensure that the coordinates are correct.
Return one line or multiple lines of python code to perform the action each time, be time efficient.
You ONLY need to return the code inside a code block, like this:
```python
# your code here
```
Specially, it is also allowed to return the following special code:
When you think you have to wait for some time, return ```WAIT```;
When you think the task can not be done, return ```FAIL```, don't easily say ```FAIL```, try your best to do the task;
When you think the task is done, return ```DONE```.
First give the current screenshot and previous things we did a short reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
""".strip()
SYS_PROMPT_SEEACT = """
You are an agent which follow my instruction and perform desktop computer tasks as instructed.
You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
For each step, you will get an observation of an image, which is the screenshot of the computer screen and you will predict the action of the computer based on the image.
""".strip()
ACTION_DESCRIPTION_PROMPT_SEEACT = """
The text and image shown below is the observation of the desktop by 1) a screenshot; and 2) accessibility tree, which is based on AT-SPI library.
{}
Follow the following guidance to think step by step before outlining the next action step at the current stage:
(Current Screenshot Identification)
Firstly, think about what the current screenshot is.
(Previous Action Analysis)
Secondly, combined with the screenshot, analyze each step of the previous action history and their intention one by one. Particularly, pay more attention to the last step, which may be more related to what you should do now as the next step.
(Screenshot Details Analysis)
Closely examine the screenshot to check the status of every part of the webpage to understand what you can operate with and what has been set or completed. You should closely examine the screenshot details to see what steps have been completed by previous actions even though you are given the textual previous actions. Because the textual history may not clearly and sufficiently record some effects of previous actions, you should closely evaluate the status of every part of the webpage to understand what you have done.
(Next Action Based on Screenshot and Analysis)
Then, based on your analysis, in conjunction with human desktop using habits and the logic of app GUI design, decide on the following action. And clearly outline which button in the screenshot users will operate with as the first next target element, its detailed location, and the corresponding operation.
"""
ACTION_GROUNDING_PROMPT_SEEACT = """
You are required to use `pyautogui` to perform the action, but don't use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with.
You can replace x, y in the code with the tag of the element you want to operate with. such as:
```python
pyautogui.moveTo(tag#3)
pyautogui.click(tag#2)
pyautogui.dragTo(tag#1, button='left')
```
When you think you can directly output precise x and y coordinates or there is no tag on which you want to interact, you can also use them directly.
But you should be careful to ensure that the coordinates are correct.
Return one line or multiple lines of python code to perform the action each time, be time efficient.
You ONLY need to return the code inside a code block, like this:
```python
# your code here
```
Specially, it is also allowed to return the following special code:
When you think you have to wait for some time, return ```WAIT```;
When you think the task can not be done, return ```FAIL```, don't easily say ```FAIL```, try your best to do the task;
When you think the task is done, return ```DONE```.
First give the current screenshot and previous things we did a short reflection, then RETURN ME THE CODE OR SPECIAL CODE I ASKED FOR. NEVER EVER RETURN ME ANYTHING ELSE.
"""

View File

@@ -1,124 +0,0 @@
import torch
from PIL import Image
import requests
from transformers import SamModel, SamProcessor
import numpy as np
import matplotlib.pyplot as plt
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
def show_mask(mask, ax, random_color=False):
if random_color:
color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
else:
color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
h, w = mask.shape[-2:]
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
ax.imshow(mask_image)
def show_box(box, ax):
x0, y0 = box[0], box[1]
w, h = box[2] - box[0], box[3] - box[1]
ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0, 0, 0, 0), lw=2))
def show_boxes_on_image(raw_image, boxes):
plt.figure(figsize=(10, 10))
plt.imshow(raw_image)
for box in boxes:
show_box(box, plt.gca())
plt.axis('on')
plt.show()
def show_points_on_image(raw_image, input_points, input_labels=None):
plt.figure(figsize=(10, 10))
plt.imshow(raw_image)
input_points = np.array(input_points)
if input_labels is None:
labels = np.ones_like(input_points[:, 0])
else:
labels = np.array(input_labels)
show_points(input_points, labels, plt.gca())
plt.axis('on')
plt.show()
def show_points_and_boxes_on_image(raw_image, boxes, input_points, input_labels=None):
plt.figure(figsize=(10, 10))
plt.imshow(raw_image)
input_points = np.array(input_points)
if input_labels is None:
labels = np.ones_like(input_points[:, 0])
else:
labels = np.array(input_labels)
show_points(input_points, labels, plt.gca())
for box in boxes:
show_box(box, plt.gca())
plt.axis('on')
plt.show()
def show_points_and_boxes_on_image(raw_image, boxes, input_points, input_labels=None):
plt.figure(figsize=(10, 10))
plt.imshow(raw_image)
input_points = np.array(input_points)
if input_labels is None:
labels = np.ones_like(input_points[:, 0])
else:
labels = np.array(input_labels)
show_points(input_points, labels, plt.gca())
for box in boxes:
show_box(box, plt.gca())
plt.axis('on')
plt.show()
def show_points(coords, labels, ax, marker_size=375):
pos_points = coords[labels == 1]
neg_points = coords[labels == 0]
ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white',
linewidth=1.25)
ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white',
linewidth=1.25)
def show_masks_on_image(raw_image, masks, scores):
if len(masks.shape) == 4:
masks = masks.squeeze()
if scores.shape[0] == 1:
scores = scores.squeeze()
nb_predictions = scores.shape[-1]
fig, axes = plt.subplots(1, nb_predictions, figsize=(15, 15))
for i, (mask, score) in enumerate(zip(masks, scores)):
mask = mask.cpu().detach()
axes[i].imshow(np.array(raw_image))
show_mask(mask, axes[i])
axes[i].title.set_text(f"Mask {i + 1}, Score: {score.item():.3f}")
axes[i].axis("off")
plt.show()
device = "cuda" if torch.cuda.is_available() else "cpu"
model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
plt.imshow(raw_image)
inputs = processor(raw_image, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model(**inputs)
masks = processor.image_processor.post_process_masks(
outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
)
scores = outputs.iou_scores
show_masks_on_image(raw_image, masks[0], scores)

View File

@@ -32,3 +32,4 @@ librosa
pymupdf
chardet
playwright
backoff

View File

@@ -0,0 +1,268 @@
id,Source,InvolvedApp
94d95f96-9699-4208-98ba-3c3119edf9c2,https://help.ubuntu.com/lts/ubuntu-help/addremove-install.html.en,OS
bedcedc4-4d72-425e-ad62-21960b11fe0d,https://www.youtube.com/watch?v=D4WyNjt_hbQ&t=2s,OS
43c2d64c-bab5-4dcb-a30c-b888321c319a,https://ubuntu.com/tutorials/command-line-for-beginners#4-creating-folders-and-files,OS
7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82,https://ubuntu.com/tutorials/command-line-for-beginners#5-moving-and-manipulating-files,OS
ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3,https://www.youtube.com/watch?v=D4WyNjt_hbQ&t=2s,OS
a462a795-fdc7-4b23-b689-e8b6df786b78,https://help.ubuntu.com/lts/ubuntu-help/shell-exit.html.en,OS
f9be0997-4b7c-45c5-b05c-4612b44a6118,https://help.ubuntu.com/lts/ubuntu-help/shell-notifications.html.en,OS
ae039631-2b12-4637-84f6-c67d51511be3,https://help.ubuntu.com/lts/ubuntu-help/net-default-browser.html.en,OS
e2eb4bf1-aa93-4192-b55d-03e2fb6dfd15,https://help.ubuntu.com/lts/ubuntu-help/contacts-add-remove.html.en,OS
28cc3b7e-b194-4bc9-8353-d04c0f4d56d2,https://help.ubuntu.com/lts/ubuntu-help/sound-volume.html.en,OS
5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57,https://help.ubuntu.com/lts/ubuntu-help/files-recover.html.en,OS
e0df059f-28a6-4169-924f-b9623e7184cc,https://help.ubuntu.com/lts/ubuntu-help/files-rename.html.en,OS
ddc75b62-7311-4af8-bfb3-859558542b36,https://help.ubuntu.com/lts/ubuntu-help/addremove-remove.html.en,OS
5c433d22-ed9a-4e31-91f5-54cf3e8acd63,https://help.ubuntu.com/lts/ubuntu-help/session-language.html.zh-CN,OS
b6781586-6346-41cd-935a-a6b1487918fc,https://help.ubuntu.com/lts/ubuntu-help/clock-timezone.html.en,OS
b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa,https://help.ubuntu.com/lts/ubuntu-help/bluetooth-turn-on-off.html.en,OS
3ce045a0-877b-42aa-8d2c-b4a863336ab8,https://help.ubuntu.com/lts/ubuntu-help/a11y-font-size.html.en,OS
fe41f596-a71b-4c2f-9b2f-9dcd40b568c3,https://help.ubuntu.com/lts/ubuntu-help/power-percentage.html.en,OS
a4d98375-215b-4a4d-aee9-3d4370fccc41,https://help.ubuntu.com/lts/ubuntu-help/privacy-screen-lock.html.en,OS
765d2b74-88a7-4d50-bf51-34e4106fd24a,https://help.ubuntu.com/lts/ubuntu-help/files-delete.html.en,OS
cc9d4f34-1ca0-4a1b-8ff2-09302696acb9,https://superuser.com/questions/178587/how-do-i-detach-a-process-from-terminal-entirely,OS
5812b315-e7bd-4265-b51f-863c02174c28,https://superuser.com/questions/149404/create-an-ssh-user-who-only-has-permission-to-access-specific-folders,OS
c56de254-a3ec-414e-81a6-83d2ce8c41fa,https://superuser.com/questions/28426/how-to-extract-text-with-ocr-from-a-pdf-on-linux,OS
6ebbfb01-ea72-4226-a2a6-dc428e111ed2,https://superuser.com/questions/46748/how-do-i-make-bash-my-default-shell-on-ubuntu,OS
4d2b519e-e872-4100-8ea3-fe71ab0f9133,https://stackoverflow.com/questions/11530090/adding-a-new-entry-to-the-path-variable-in-zsh,OS
c288e301-e626-4b98-a1ab-159dcb162af5,https://stackoverflow.com/questions/41986507/unable-to-set-default-python-version-to-python3-in-ubuntu,OS
13584542-872b-42d8-b299-866967b5c3ef,https://superuser.com/questions/72176/linux-set-default-terminal-size-and-screen-position,OS
23393935-50c7-4a86-aeea-2b78fd089c5c,https://superuser.com/questions/91307/copying-only-jpg-from-a-directory-structure-to-another-location-linux,OS
f10b16e1-c160-4cb3-989f-7b2ec89bc073,https://www.wikihow.com/Install-Gnome-on-Ubuntu,OS
eb03d19a-b88d-4de4-8a64-ca0ac66f426b,https://www.youtube.com/shorts/t9JLUaT55UQ,MS Excel
0bf05a7d-b28b-44d2-955a-50b41e24012a,https://www.youtube.com/shorts/FPAQaDTS8VY,MS Excel
7b802dad-6e0f-4204-9815-d4e3f57627d8,https://www.youtube.com/shorts/Of-lzeP1usE,MS Excel
7a4e4bc8-922c-4c84-865c-25ba34136be1,https://www.youtube.com/shorts/bvUhr1AHs44,MS Excel
2bd59342-0664-4ccb-ba87-79379096cc08,https://www.youtube.com/shorts/L3Z-F1QTQFY,MS Excel
a9f325aa-8c05-4e4f-8341-9e4358565f4f,https://www.youtube.com/shorts/A0gmEBRKXWs,MS Excel
ecb0df7a-4e8d-4a03-b162-053391d3afaf,https://www.youtube.com/shorts/tXOovKn0H68,MS Excel
7efeb4b1-3d19-4762-b163-63328d66303b,https://www.youtube.com/shorts/4jzXfZNhfmk,MS Excel
4e6fcf72-daf3-439f-a232-c434ce416af6,https://www.youtube.com/shorts/0uxJccNCKcE,MS Excel
6054afcb-5bab-4702-90a0-b259b5d3217c,https://www.youtube.com/shorts/JTbZ8sRxkdU,MS Excel
abed40dc-063f-4598-8ba5-9fe749c0615d,https://www.youtube.com/shorts/xgf4ZpsEx5M,MS Excel
01b269ae-2111-4a07-81fd-3fcd711993b0,https://www.youtube.com/shorts/VrUzPTIwQ04,MS Excel
8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14,https://www.youtube.com/shorts/Hbcwu6IQ1ns,MS Excel
af2b02f7-acee-4be4-8b66-499fab394915,https://www.youtube.com/shorts/AwKsb5VmtBI,MS Excel
da1d63b8-fa12-417b-ba18-f748e5f770f3,https://www.youtube.com/shorts/hquscnbz2-U,MS Excel
636380ea-d5f6-4474-b6ca-b2ed578a20f1,https://www.youtube.com/shorts/_BYL6VOHLGw,"MS Excel, Edge"
5ba77536-05c5-4aae-a9ff-6e298d094c3e,https://www.youtube.com/shorts/CuBC1evUS5I,MS Excel
4bc4eaf4-ca5e-4db2-8138-8d4e65af7c0b,https://www.youtube.com/shorts/1adQWfjN-tI,MS Excel
672a1b02-c62f-4ae2-acf0-37f5fb3052b0,https://www.youtube.com/shorts/2rhdQXI4Lng,MS Excel
648fe544-16ba-44af-a587-12ccbe280ea6,https://www.youtube.com/shorts/sOPBMWaC6Uc,MS Excel
8985d1e4-5b99-4711-add4-88949ebb2308,https://www.youtube.com/shorts/J5ts2Acv9Pc,MS Excel
9e606842-2e27-43bf-b1d1-b43289c9589b,https://www.youtube.com/shorts/B-mGYDFOyUs,MS Excel
fcb6e45b-25c4-4087-9483-03d714f473a9,https://www.youtube.com/shorts/GZipp7nOZS0,MS Excel
68c0c5b7-96f3-4e87-92a7-6c1b967fd2d2,https://www.youtube.com/shorts/JEH5TsK-cCk,"MS Excel, Edge"
fff629ea-046e-4793-8eec-1a5a15c3eb35,https://www.youtube.com/shorts/8WybtCdUT6w,MS Excel
5c9a206c-bb00-4fb6-bb46-ee675c187df5,https://www.youtube.com/shorts/VbQtMNnq9i4,MS Excel
e975ae74-79bd-4672-8d1c-dc841a85781d,https://www.youtube.com/shorts/GjT7gGe5Sr8,MS Excel
34a6938a-58da-4897-8639-9b90d6db5391,https://www.youtube.com/shorts/gW37x2TkzOY,MS Excel
b5a22759-b4eb-4bf2-aeed-ad14e8615f19,https://www.youtube.com/shorts/3xLa-D0C7Ic,MS Excel
2f9913a1-51ed-4db6-bfe0-7e1c95b3139e,https://www.youtube.com/shorts/dGLRcmfVO6Q,MS Excel
2558031e-401d-4579-8e00-3ecf540fb492,https://www.mrexcel.com/board/threads/sales-for-the-first-6-weeks.1249213/,MS Excel
39aa4e37-dc91-482e-99af-132a612d40f3,https://www.libreofficehelp.com/add-insert-delete-copy-move-rename-a-worksheet-in-libreoffice-calc/,LibreOffice Calc
0cecd4f3-74de-457b-ba94-29ad6b5dafb6,https://www.libreofficehelp.com/add-insert-delete-copy-move-rename-a-worksheet-in-libreoffice-calc/,LibreOffice Calc
4188d3a4-077d-46b7-9c86-23e1a036f6c1,https://www.libreofficehelp.com/freeze-unfreeze-rows-columns-ranges-calc/,LibreOffice Calc
51b11269-2ca8-4b2a-9163-f21758420e78,https://www.reddit.com/r/LibreOfficeCalc/comments/186pcc6/how_to_arrange_numbers_in_a_column_from_minimum/,LibreOffice Calc
7e429b8d-a3f0-4ed0-9b58-08957d00b127,https://medium.com/@divyangichaudhari17/how-to-use-vlookup-and-hlookup-in-libre-calc-3370698bb3ff,LibreOffice Calc
f5a90742-3fa2-40fc-a564-f29b054e0337,https://superuser.com/questions/1236149/libreoffice-calc-how-to-apply-functions-to-columns,LibreOffice Calc
22df9241-f8d7-4509-b7f1-37e501a823f7,https://superuser.com/questions/1767185/how-do-you-move-cells-in-libreoffice-calc,LibreOffice Calc
1434ca3e-f9e3-4db8-9ca7-b4c653be7d17,https://www.wikihow.com/Remove-Duplicates-in-Open-Office-Calc,LibreOffice Calc
347ef137-7eeb-4c80-a3bb-0951f26a8aff,https://www.youtube.com/watch?v=bgO40-CjYNY,LibreOffice Calc
6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5,https://www.youtube.com/watch?v=nl-nXjJurhQ,LibreOffice Calc
3aaa4e37-dc91-482e-99af-132a612d40f3,https://www.quora.com/How-can-you-import-export-CSV-files-with-LibreOffice-Calc-or-OpenOffice,LibreOffice Calc
0decd4f3-74de-457b-ba94-29ad6b5dafb6,https://justclickhere.co.uk/resources/checkboxes-tick-boxes-libreoffice-calc/,LibreOffice Calc
37608790-6147-45d0-9f20-1137bb35703d,https://www.youtube.com/shorts/uzPo_CPCHH8,MS Excel
f9584479-3d0d-4c79-affa-9ad7afdd8850,https://youtube.com/shorts/feldd-Pn48c?si=9xJiem2uAHm6Jshb,LibreOffice Calc
d681960f-7bc3-4286-9913-a8812ba3261a,https://www.youtube.com/shorts/d7U1S_IsTVM,LibreOffice Calc
f6a90742-3fa2-40fc-a564-f29b054e0337,https://www.excel-easy.com/examples/drop-down-list.html,LibreOffice Calc
21df9241-f8d7-4509-b7f1-37e501a823f7,https://www.youtube.com/watch?v=p5C4V_AO1UU,LibreOffice Calc
1334ca3e-f9e3-4db8-9ca7-b4c653be7d17,https://techcommunity.microsoft.com/t5/excel/excel-workbook-top-way-too-big-can-t-see-rows-and-columns/m-p/4014694,LibreOffice Calc
357ef137-7eeb-4c80-a3bb-0951f26a8aff,https://www.reddit.com/r/excel/comments/17zny8u/calculating_total_amount_earned_from_total_hours/,LibreOffice Calc
6f99a1ad-07d2-4b66-a1ce-ece6d99c20a5,https://techcommunity.microsoft.com/t5/excel/sumarize-the-sheetnames/m-p/4014716,LibreOffice Calc
aa3a8974-2e85-438b-b29e-a64df44deb4b,https://www.quora.com/Libre-Office-Calc-How-do-I-resize-all-cells-in-a-sheet-to-make-them-fit-to-1-page-for-printing-and-exporting-as-PDF,LibreOffice Calc
a01fbce3-2793-461f-ab86-43680ccbae25,https://superuser.com/questions/1250677/how-to-set-decimal-separator-in-libre-office-calc,LibreOffice Calc
4f07fbe9-70de-4927-a4d5-bb28bc12c52c,https://superuser.com/questions/1081048/libreoffice-calc-how-to-pad-number-to-fixed-decimals-when-used-within-formula,LibreOffice Calc
e3b1d5fa-ed00-4129-bda1-1452bd2b6772,https://www.reddit.com/r/libreoffice/comments/tel112/calc_how_to_calculate_sum_by_categories/,LibreOffice Calc
ca6a9524-f8e9-4d2f-9364-ab0cad567739,https://www.reddit.com/r/libreoffice/comments/113gmyc/how_to_remove_certain_text_from_cells_in_calc/,LibreOffice Calc
a455e8d0-930f-40d2-9575-5e8d2d222f58,https://superuser.com/questions/562944/quickly-fill-blank-cells-in-a-list-in-libreoffice-calc,LibreOffice Calc
83ee22c6-7737-49ce-9b5a-138c3e92af04,https://superuser.com/questions/661102/currency-conversion-in-libreoffice-calc,LibreOffice Calc
819f61c2-ec77-4d3f-9996-0838ae5aacc8,https://superuser.com/questions/381696/creating-a-column-of-working-days-in-libreoffice-calc,LibreOffice Calc
69d577b3-004e-4bca-89b2-0d7c2f6049e3,https://superuser.com/questions/387106/libreoffice-calc-how-to-get-total-for-hhmmss-cells,LibreOffice Calc
0a1bf4ca-d4ea-4618-baa5-6e8dc1b46d82,https://superuser.com/questions/571915/sum-up-to-n-highest-value-out-of-a-series,LibreOffice Calc
ac9bb6cb-1888-43ab-81e4-a98a547918cd,https://superuser.com/questions/1674211/how-to-change-colour-of-slide-number-in-libre-office,LibreOffice Impress
5d901039-a89c-4bfb-967b-bf66f4df075e,https://superuser.com/questions/986776/how-can-i-stretch-an-image-in-a-libreoffice-impress-presentation-to-fill-the-pag,LibreOffice Impress
071d4ace-091a-4ec3-886e-f4be55ae375d,https://superuser.com/questions/706860/hide-slide-numbers-and-slide-footer-on-first-and-second-slide-in-libreoffice-imp?rq=1,LibreOffice Impress
550ce7e7-747b-495f-b122-acdc4d0b8e54,"https://technical-tips.com/blog/software/text-in-libreoffice-strikethrough--6948#:~:text=To%20strikethrough%20Text%20in%20LibreOffice%201%20In%20your,effect%22%20can%20your%20additionally%2C%20for%20example%2C%20double%20underline.",LibreOffice Impress
455d3c66-7dc6-4537-a39a-36d3e9119df7,"https://www.libreofficehelp.com/export-libreoffice-impress-slides-images/#:~:text=Exporting%20a%20single%20slide%20as.jpg%2C.png%2C%20etc%20image%20is,on%20the%20checkbox%20Selection.%20Provide%20jpg%20quality%20options.",LibreOffice Impress
af23762e-2bfd-4a1d-aada-20fa8de9ce07,https://superuser.com/questions/1059080/how-to-make-a-summary-slide-in-impress-listing-the-titles-of-all-slides-autom,LibreOffice Impress
c59742c0-4323-4b9d-8a02-723c251deaa0,https://www.reddit.com/r/libreoffice/comments/17lcdrp/audio_not_supported_in_libreoffice_impress/,LibreOffice Impress
39478d4a-1049-456f-aa77-407811393add,https://www.reddit.com/r/libreoffice/comments/jul3o8/putting_cap_or_hat_or_carat_symbol_in_libre/,LibreOffice Impress
c3ad4442-499f-4e58-bc4e-1a1417ea9b8c,http://maharajacollege.ac.in/material/Libreofficeimpresspdf.pdf,LibreOffice Impress
ef9d12bd-bcee-4ba0-a40e-918400f43ddf,https://www.reddit.com/r/libreoffice/comments/18elh3y/i_closed_the_slide_pannel_on_the_left_and_idk_how/,LibreOffice Impress
9ec204e4-f0a3-42f8-8458-b772a6797cab,https://www.tiktok.com/@lil.d1rt_/video/7247574148887629083,LibreOffice Impress
0f84bef9-9790-432e-92b7-eece357603fb,https://stackoverflow.com/questions/29036788/how-to-disable-libreoffice-impress-to-use-multiple-display,LibreOffice Impress
ce88f674-ab7a-43da-9201-468d38539e4a,https://justclickhere.co.uk/resources/change-slides-in-impress-to-portrait/,LibreOffice Impress
f0a334af-f91b-4c03-b578-aac9bec2b543,https://www.libreofficehelp.com/insert-video-impress-presentation/#Inserting_a_Video_in_Impress,LibreOffice Impress
3b27600c-3668-4abd-8f84-7bcdebbccbdb,https://www.libreofficehelp.com/change-slide-background-impress/#All_Slides,LibreOffice Impress
a097acff-6266-4291-9fbd-137af7ecd439,https://www.youtube.com/watch?v=DDmEvjs4iBw,LibreOffice Impress
21760ecb-8f62-40d2-8d85-0cee5725cb72,https://www.libreofficehelp.com/add-animations-transitions-libreoffice-impress-slides/,LibreOffice Impress
3cc4f35d-fa2e-4555-afb9-741b7c062a74,https://documentation.libreoffice.org/assets/Uploads/Documentation/en/IG7.6/IG76-ImpressGuide.pdf,LibreOffice Impress
6ada715d-3aae-4a32-a6a7-429b2e43fb93,https://www.quora.com/How-do-you-insert-images-into-a-LibreOffice-Writer-document,LibreOffice Writer
ecc2413d-8a48-416e-a3a2-d30106ca36cb,https://www.quora.com/How-can-I-insert-a-blank-page-on-libreoffice,LibreOffice Writer
0e47de2a-32e0-456c-a366-8c607ef7a9d2,https://ask.libreoffice.org/t/how-to-start-page-numbering-on-a-certain-page/39931/4,LibreOffice Writer
4bcb1253-a636-4df4-8cb0-a35c04dfef31,https://www.libreofficehelp.com/save-export-writer-documents-in-pdf-epub-format/,LibreOffice Writer
0810415c-bde4-4443-9047-d5f70165a697,https://www.youtube.com/watch?v=Q_AaL6ljudU,LibreOffice Writer
e528b65e-1107-4b8c-8988-490e4fece599,https://www.youtube.com/watch?v=l25Evu4ohKg,LibreOffice Writer
66399b0d-8fda-4618-95c4-bfc6191617e9,https://www.youtube.com/watch?v=l25Evu4ohKg,LibreOffice Writer
936321ce-5236-426a-9a20-e0e3c5dc536f,https://www.youtube.com/watch?v=l25Evu4ohKg,LibreOffice Writer
663876c7-3471-43db-ba51-f410b13d9d7d,https://askubuntu.com/questions/319593/how-to-type-science-equations-in-libre-office,LibreOffice Writer
3ef2b351-8a84-4ff2-8724-d86eae9b842e,https://askubuntu.com/questions/1066351/how-do-you-center-align-in-libreoffice#:~:text=Ctrl%20%2B%20e%20will%20Center%20align%20the%20cursor%20for%20you.,LibreOffice Writer
45d61a06-6545-4422-97b7-bc76cfa964c1,https://stackoverflow.com/questions/71685737/how-to-replace-all-newlines-with-paragraph-marks-in-libreoffice-write,LibreOffice Writer
0b17a146-2934-46c7-8727-73ff6b6483e8,https://askubuntu.com/questions/245695/how-do-you-insert-subscripts-and-superscripts-into-ordinary-non-formula-text-i,LibreOffice Writer
0e763496-b6bb-4508-a427-fad0b6c3e195,https://ask.libreoffice.org/t/how-do-i-change-the-font-for-the-whole-document-in-writer/9220,LibreOffice Writer
f178a4a9-d090-4b56-bc4c-4b72a61a035d,https://ask.libreoffice.org/t/how-do-i-make-times-new-roman-the-default-font-in-lo/64604,LibreOffice Writer
0a0faba3-5580-44df-965d-f562a99b291c,https://stackoverflow.com/questions/64528055/how-to-make-part-of-my-sentence-left-aligned-and-rest-as-right-aligned,LibreOffice Writer
e246f6d8-78d7-44ac-b668-fcf47946cb50,https://ask.libreoffice.org/t/how-to-change-text-size-color-of-italic-font/77712,LibreOffice Writer
8472fece-c7dd-4241-8d65-9b3cd1a0b568,https://stackoverflow.com/questions/37259827/libreoffice-writer-how-to-set-different-colors-to-each-letter,LibreOffice Writer
88fe4b2d-3040-4c70-9a70-546a47764b48,https://stackoverflow.com/questions/56554555/libreoffice-writer-how-to-create-empty-line-space-after-every-period-in-a-par,LibreOffice Writer
6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2,https://superuser.com/questions/762500/how-do-i-find-all-highlighted-text-in-libreoffice-writer,LibreOffice Writer
d53ff5ee-3b1a-431e-b2be-30ed2673079b,https://ask.libreoffice.org/t/how-to-convert-all-uppercase-to-lowercase/53341,LibreOffice Writer
72b810ef-4156-4d09-8f08-a0cf57e7cefe,https://superuser.com/questions/657792/libreoffice-writer-how-to-apply-strikethrough-text-formatting?rq=1,LibreOffice Writer
6f81754e-285d-4ce0-b59e-af7edb02d108,https://superuser.com/questions/789473/remove-duplicate-lines-in-libreoffice-openoffice-writer,LibreOffice Writer
41c621f7-3544-49e1-af8d-dafd0f834f75,https://superuser.com/questions/1668018/how-to-auto-format-lines-in-libre-office-writer,LibreOffice Writer
b21acd93-60fd-4127-8a43-2f5178f4a830,https://superuser.com/questions/1097199/how-can-i-double-space-a-document-in-libreoffice?rq=1,LibreOffice Writer
59f21cfb-0120-4326-b255-a5b827b38967,https://docs.videolan.me/vlc-user/desktop/3.0/en/basic/media.html#playing-a-file,VLC player
8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89,https://docs.videolan.me/vlc-user/desktop/3.0/en/basic/recording/playing.html#choose-your-recordings-folder,VLC player
8f080098-ddb1-424c-b438-4e96e5e4786e,https://medium.com/@jetscribe_ai/how-to-extract-mp3-audio-from-videos-using-vlc-media-player-beeef644ebfb,VLC player
bba3381f-b5eb-4439-bd9e-80c22218d5a7,https://www.quora.com/How-do-I-play-online-videos-using-the-VLC-media-player,VLC player
a1c3ab35-02de-4999-a7ed-2fd12c972c6e,https://www.quora.com/How-do-I-compress-a-video-with-VLC,VLC player
fba2c100-79e8-42df-ae74-b592418d54f4,https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s,VLC player
d70666e4-7348-42c7-a06a-664094c5df3c,https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s,VLC player
efcf0d81-0835-4880-b2fd-d866e8bc2294,"https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s, https://help.ubuntu.com/stable/ubuntu-help/look-background.html.en",VLC player
8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f,https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s,VLC player
aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6,https://videoconverter.wondershare.com/vlc/how-to-rotate-a-video-using-vlc.html?gad_source=1&gclid=CjwKCAiA-vOsBhAAEiwAIWR0TaGSOLkYiBeVQGZSyfeUP3g-tIvYxffl5RFIu0-zrUL1IF41eCw1JRoCnCMQAvD_BwE,VLC player
386dbd0e-0241-4a0a-b6a2-6704fba26b1c,https://superuser.com/questions/1708415/pause-and-play-vlc-in-background?rq=1,VLC player
9195653c-f4aa-453d-aa95-787f6ccfaae9,https://superuser.com/questions/1513285/how-can-i-increase-the-maximum-volume-output-by-vlc?rq=1,VLC player
5ac2891a-eacd-4954-b339-98abba077adb,"https://superuser.com/questions/1412810/how-to-prevent-vlc-media-player-from-auto-closing-after-video-end#:%7E:text=Click%20on%20%22Media%22on%20the,VLC%20player%20after%20video%20ending",VLC player
0d95d28a-9587-433b-a805-1fbe5467d598,https://superuser.com/questions/1299036/vlc-how-to-open-the-folder-of-the-current-playing-video?noredirect=1&lq=1,VLC player
d06f0d4d-2cd5-4ede-8de9-598629438c6e,https://superuser.com/questions/1039392/changing-colour-of-vlc-volume-slider,VLC player
a5bbbcd5-b398-4c91-83d4-55e1e31bbb81,https://superuser.com/questions/776056/how-to-hide-bottom-toolbar-in-vlc,VLC player
f3977615-2b45-4ac5-8bba-80c17dbe2a37,https://www.reddit.com/r/Fedora/comments/rhljzd/how_to_run_multiple_instances_of_vlc_media_player/,VLC player
c669a35f-d45a-450e-b1f2-f473748337bb,https://www.quora.com/How-do-I-fast-forward-a-video-in-VLC-player,VLC player
d1ba14d0-fef8-4026-8418-5b581dc68ca0,https://superuser.com/questions/306154/how-to-use-a-b-repeat-feature-of-vlc,VLC player
215dfd39-f493-4bc3-a027-8a97d72c61bf,https://superuser.com/questions/1224784/how-to-change-vlcs-splash-screen,VLC player
bb5e4c0d-f964-439c-97b6-bdb9747de3f4,https://www.wikihow.com/Remove-an-Email-Account-from-Thunderbird,ThunderBird
7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3,https://www.wikihow.com/Access-Gmail-With-Mozilla-Thunderbird,ThunderBird
b188fe10-ae67-4db8-a154-26a0b8ff8f1e,https://www.reddit.com/r/Thunderbird/comments/17vv2os/restore_readability_in_message_list_pane/,ThunderBird
12086550-11c0-466b-b367-1d9e75b3910e,https://www.bitrecover.com/blog/manage-thunderbird-profiles/,ThunderBird
06fe7178-4491-4589-810f-2e2bc9502122,https://www.quora.com/How-do-I-backup-email-files-in-Mozilla-Thunderbird,ThunderBird
6766f2b8-8a72-417f-a9e5-56fcaa735837,"https://www.adsigner.com/user-manual/signatures/setup-email-client-thunderbird/#:~:text=is%20probably%20hidden.-,Right%20click%20on%20the%20empty%20space%20at%20the%20top%20of,signature%20from%20a%20file%20instead.",ThunderBird
e1e75309-3ddb-4d09-92ec-de869c928143,https://support.mozilla.org/en-US/kb/organize-your-messages-using-filters,ThunderBird
3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5,https://support.mozilla.org/en-US/kb/organize-your-messages-using-filters,ThunderBird
35253b65-1c19-4304-8aa4-6884b8218fc0,https://support.mozilla.org/en-US/questions/1259354,ThunderBird
d088f539-cab4-4f9a-ac92-9999fc3a656e,https://support.mozilla.org/en-US/kb/how-use-attachments,ThunderBird
2ad9387a-65d8-4e33-ad5b-7580065a27ca,"https://support.mozilla.org/bm/questions/1027435, https://www.wikihow.tech/Create-Folders-in-Mozilla-Thunderbird",ThunderBird
480bcfea-d68f-4aaa-a0a9-2589ef319381,https://www.reddit.com/r/Thunderbird/comments/182dg5p/unified_inbox_howto/,ThunderBird
37b9808f-b2b4-4177-ab00-9ddfae4bad27,https://www.quora.com/How-can-I-schedule-Mozilla-Thunderbird-to-turn-off-automatically,ThunderBird
af630914-714e-4a24-a7bb-f9af687d3b91,https://stackoverflow.com/questions/11333148/adding-a-toolbar-button-to-a-thundebird-compose-message-window?rq=3,ThunderBird
3299584d-8f11-4457-bf4c-ce98f7600250,https://superuser.com/questions/1643561/would-like-to-see-the-email-address-from-sender-in-the-column,ThunderBird
030eeff7-b492-4218-b312-701ec99ee0cc,https://superuser.com/questions/1781004/how-do-i-remove-the-indentation-and-character-in-quoted-text-of-a-reply-mess,ThunderBird
94760984-3ff5-41ee-8347-cf1af709fea0,https://superuser.com/questions/1757333/how-can-i-view-thunderbird-in-full-dark-mode,ThunderBird
99146c54-4f37-4ab8-9327-5f3291665e1e,https://superuser.com/questions/1764409/how-to-send-email-with-thunderbird-without-configuring-an-incoming-email-service,ThunderBird
9656a811-9b5b-4ddf-99c7-5117bcef0626,https://superuser.com/questions/205240/is-there-a-way-to-get-a-popup-confirmation-box-when-you-send-an-email-in-thunder?rq=1,ThunderBird
c9e7eaf2-b1a1-4efc-a982-721972fa9f02,https://superuser.com/questions/544480/how-to-apply-automatic-message-filters-to-subfolders-too?noredirect=1&lq=1,ThunderBird
bb5e4c0d-f964-439c-97b6-bdb9747de3f4,https://support.google.com/chrome/answer/95426?sjid=16867045591165135686-AP,Chrome
7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3,https://support.google.com/chrome/answer/95647?hl=en&ref_topic=7438325&sjid=16867045591165135686-AP#zippy=%2Cdelete-cookies-from-a-site,Chrome
12086550-11c0-466b-b367-1d9e75b3910e,https://www.quora.com/What-are-the-cool-tricks-to-use-Google-Chrome,Chrome
06fe7178-4491-4589-810f-2e2bc9502122,https://www.wikihow.com/Switch-Tabs-in-Chrome,Chrome
6766f2b8-8a72-417f-a9e5-56fcaa735837,https://support.google.com/chrome/thread/205881926/it-s-possible-to-load-unpacked-extension-automatically-in-chrome?hl=en,Chrome
e1e75309-3ddb-4d09-92ec-de869c928143,https://in5stepstutorials.com/google-chrome/save-web-page-as-pdf-in-chrome.php,Chrome
3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5,https://in5stepstutorials.com/google-chrome/add-change-delete-autofill-address.php,Chrome
35253b65-1c19-4304-8aa4-6884b8218fc0,"https://www.laptopmag.com/articles/how-to-create-desktop-shortcuts-for-web-pages-using-chrome, https://www.reddit.com/r/chrome/comments/13xcbap/crete_shortcut_option_missing/",Chrome
d088f539-cab4-4f9a-ac92-9999fc3a656e,https://medium.com/@inkverseuk2/useful-tips-and-tricks-for-the-google-chrome-browser-ac7d0d24b3cc,Chrome
2ad9387a-65d8-4e33-ad5b-7580065a27ca,https://www.youtube.com/watch?v=IN-Eq_UripQ,Chrome
7a5a7856-f1b6-42a4-ade9-1ca81ca0f263,https://www.youtube.com/watch?v=ZaZ8GcTxjXA,Chrome
3720f614-37fd-4d04-8a6b-76f54f8c222d,https://superuser.com/questions/984668/change-interface-language-of-chrome-to-english,Chrome
b63059a2-53bc-4163-a89f-3ac948c74081,https://superuser.com/questions/1303418/how-do-i-make-chrome-block-absolutely-all-pop-ups?rq=1,Chrome
44ee5668-ecd5-4366-a6ce-c1c9b8d4e938,https://superuser.com/questions/1787991/clear-browsing-history-from-specific-site-on-chrome,Chrome
b5ebc8c6-6329-4373-85b4-9421c97375e9,https://superuser.com/questions/364470/is-there-a-way-to-view-google-chrome-browsing-history-past-three-months-ago?rq=1,Chrome
93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9,https://superuser.com/questions/1417973/how-to-disable-google-chrome-dark-mode,Chrome
2ae9ba84-3a0d-4d4c-8338-3a1478dc5fe3,https://superuser.com/questions/1393683/how-to-change-the-username-in-google-chrome-profiles?rq=1,Chrome
480bcfea-d68f-4aaa-a0a9-2589ef319381,https://bugartisan.medium.com/disable-the-new-chrome-ui-round-in-2023-db168271f71e,Chrome
37b9808f-b2b4-4177-ab00-9ddfae4bad27,https://www.reddit.com/r/chrome/comments/17niw3h/tutorial_how_to_disable_the_download_bubble_in/,Chrome
af630914-714e-4a24-a7bb-f9af687d3b91,https://www.howtogeek.com/680260/how-to-change-chromes-default-text-size/,Chrome
ae78f875-5b98-4907-bbb5-9c737fc68c03,https://support.google.com/chrome/thread/219988391/increase-search-results-per-page?hl=en,Chrome
0ed39f63-6049-43d4-ba4d-5fa2fe04a951,https://www.quora.com/How-do-you-find-and-replace-text-in-Visual-Studio-Code,VS Code
b421106e-b282-4c41-af72-37c95493f95f,https://stackoverflow.com/questions/74153883/launch-vscode-with-new-txt-file,VS Code
53ad5833-3455-407b-bbc6-45b4c79ab8fb,https://www.youtube.com/watch?v=VqCgcpAypFQ,VS Code
eabc805a-bfcf-4460-b250-ac92135819f6,https://www.youtube.com/watch?v=VqCgcpAypFQ,VS Code
3486f395-ad68-459c-8c39-ea07de934dd4,https://www.youtube.com/watch?v=VqCgcpAypFQ,VS Code
982d12a5-beab-424f-8d38-d2a48429e511,https://www.youtube.com/watch?v=ORrELERGIHs,VS Code
4e60007a-f5be-4bfc-9723-c39affa0a6d3,"https://campbell-muscle-lab.github.io/howtos_Python/pages/documentation/best_practices/vscode_docstring_extension/vscode_docstring_extension.html#:~:text=Type%2C%20Ctrl%20%2B%20Shift%20%2B%20P,select%20the%20NumPy%20docstring%20format.",VS Code
e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2,https://superuser.com/questions/1386061/how-to-suppress-some-python-errors-warnings-in-vs-code,VS Code
9439a27b-18ae-42d8-9778-5f68f891805e,https://stackoverflow.com/questions/75832474/how-to-keep-cursor-in-debug-console-when-debugging-in-visual-studio-code,VS Code
ae506c68-352c-4094-9caa-ee9d42052317,https://superuser.com/questions/1460404/get-visual-studio-code-terminal-history?rq=1,VS Code
ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae,https://superuser.com/questions/1748097/vs-code-disable-tree-view-find-explorer-search,VS Code
c714dcee-cad3-4e12-8f3c-12bdcfcdb048,https://superuser.com/questions/1417361/how-to-disable-file-filtering-in-vs-code-sidebar-explorer?rq=1,VS Code
930fdb3b-11a8-46fe-9bac-577332e2640e,https://superuser.com/questions/1270103/how-to-switch-the-cursor-between-terminal-and-code-in-vscode,VS Code
276cc624-87ea-4f08-ab93-f770e3790175,https://www.quora.com/unanswered/How-do-you-set-the-line-length-in-Visual-Studio-Code,VS Code
9d425400-e9b2-4424-9a4b-d4c7abac4140,https://superuser.com/questions/1466771/is-there-a-way-to-make-editor-tabs-stack-in-vs-code,VS Code
7a4deb26-d57d-4ea9-9a73-630f66a7b568,https://www.quora.com/How-do-I-edit-a-photo-in-GIMP,GIMP
554785e9-4523-4e7a-b8e1-8016f565f56a,https://www.quora.com/How-do-I-edit-a-photo-in-GIMP,GIMP
77b8ab4d-994f-43ac-8930-8ca087d7c4b4,https://superuser.com/questions/1636113/how-to-get-gimp-to-recognize-images-or-pictures-folder-as-the-default-folder-for,GIMP
f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce,https://superuser.com/questions/612338/how-do-i-select-and-move-an-object-in-gimp,GIMP
d52d6308-ec58-42b7-a2c9-de80e4837b2b,https://superuser.com/questions/1447106/how-to-get-rid-of-the-gimp-tool-options-box,GIMP
2a729ded-3296-423d-aec4-7dd55ed5fbb3,https://www.youtube.com/watch?v=lOzSiOIipSM,GIMP
b148e375-fe0b-4bec-90e7-38632b0d73c2,https://www.quora.com/How-do-I-add-layers-in-GIMP,GIMP
a746add2-cab0-4740-ac36-c3769d9bfb46,https://www.youtube.com/watch?v=_L_MMU22bAw,GIMP
7b7617bd-57cc-468e-9c91-40c4ec2bcb3d,https://www.youtube.com/watch?v=G_PjQAy0iiU,GIMP
d16c99dc-2a1e-46f2-b350-d97c86c85c15,https://stackoverflow.com/questions/75185543/use-gimp-to-resize-image-in-one-layer-only,GIMP
573f79b5-abfe-4507-b455-251d45fe6198,https://stackoverflow.com/questions/45196895/gimp-add-padding-to-multiple-images,GIMP
06ca5602-62ca-47f6-ad4f-da151cde54cc,https://stackoverflow.com/questions/74664666/how-to-export-palette-based-png-in-gimp,GIMP
fa9b1e10-4d2d-4a13-af76-7efa822b6a8b,https://stackoverflow.com/questions/24626608/how-to-combine-several-png-images-as-layers-in-a-single-xcf-image,GIMP
6b2b72ed-3a10-4849-876a-750f7cdf3886,https://stackoverflow.com/questions/21018007/resize-image-to-fit-canvas-gimp,GIMP
d0e42fd2-d290-46b3-b598-a6e2b7be9c85,https://stackoverflow.com/questions/56758689/stop-gimp-from-merging-layers-when-de-selecting,GIMP
e2dd0213-26db-4349-abe5-d5667bfd725c,https://superuser.com/questions/839650/how-to-move-an-inserted-text-box-in-gimp,GIMP
f723c744-e62c-4ae6-98d1-750d3cd7d79d,https://www.reddit.com/r/GIMP/comments/12e57w8/how_to_use_gimp_to_exaggerate_contrast/,GIMP
8d6b1c9c-1aab-47fe-9ba5-e84c838d0c57,https://www.quora.com/How-can-email-attachments-be-converted-into-a-word-document-using-Mozilla-Thunderbird,multiple
11e1e614-9696-4d94-88c9-8e556880d41a,https://ifttt.com/applets/L2A89geP-send-chrome-software-update-release-alerts-to-email,multiple
57956154-f0fe-486b-88b8-e7126da035a9,https://zapier.com/apps/email/integrations/google-sheets/547/get-email-notifications-for-new-rows-in-a-google-sheets-spreadsheet,multiple
ec14c524-b245-456d-abd6-ec12c746e9f8,https://zapier.com/apps/gmail/integrations/google-sheets/2618/save-new-gmail-emails-matching-certain-traits-to-a-google-spreadsheet,multiple
cbf5fbda-425e-4619-bcf2-0ea8d4c0bfa3,https://zapier.com/apps/google-sheets/integrations/google-slides/13919/refresh-charts-on-a-google-slides-presentation-when-rows-are-updated-on-google-sheets,multiple
a54284d0-7b93-4327-bfcc-3a421516dbdd,https://superuser.com/questions/655622/cannot-drag-images-from-thunderbird-to-word,multiple
58565672-7bfe-48ab-b828-db349231de6b,https://superuser.com/questions/1792660/open-link-from-other-application-does-not-open-the-url-in-firefox,multiple
6d72aad6-187a-4392-a4c4-ed87269c51cf,https://superuser.com/questions/923171/converting-openoffice-impress-presentation-to-video-without-screen-recording,multiple
937087b6-f668-4ba6-9110-60682ee33441,https://superuser.com/questions/187440/set-default-ubuntu-video-player-as-vlc,multiple
f8cfa149-d1c1-4215-8dac-4a0932bad3c2,https://superuser.com/questions/1803088/libreoffice-calc-clears-clipboard,multiple
5e974913-6905-4c3f-8b65-d7837f3931cc,https://stackoverflow.com/questions/61856141/how-can-i-start-thunderbird-and-minimize-the-window-on-startup-in-ubuntu,multiple
7c179dad-f1c7-4892-b53f-d1c4023d23c7,https://stackoverflow.com/questions/21155085/pasting-excel-tables-in-thunderbird-e-mail-client,multiple
4a68b2dd-70f2-4532-9bc1-d21878bd8cb2,https://stackoverflow.com/questions/65669955/thunderbird-how-to-send-a-mail-to-all-receivers-of-a-folder,multiple
c8457fde-b14b-4aba-b402-144842ea29e1,https://stackoverflow.com/questions/65788200/how-to-open-xlsx-files-in-ms-excel-from-vs-code,multiple
81c425f5-78f3-4771-afd6-3d2973825947,https://www.zyxware.com/articles/3770/how-to-transfer-data-in-libreoffice-calc-to-libreoffice-writer-in-table-format,multiple
bb83cab4-e5c7-42c7-a67b-e46068032b86,https://ask.libreoffice.org/t/save-impress-presentation-as-writer-document/5291/4,multiple
227d2f97-562b-4ccb-ae47-a5ec9e142fbb,https://discourse.gnome.org/t/gimp-and-libre-office-writer/15430/4,multiple
a6bbc08c-51e9-4ee4-9327-83d05075d960,https://forum.openoffice.org/en/forum/viewtopic.php?t=105055,multiple
964e6e03-ba31-466b-8c15-5a351a81f675,https://www.maketecheasier.com/mail-merge-thunderbird-calc/,multiple
2fe4b718-3bd7-46ec-bdce-b184f5653624,https://www.thewindowsclub.com/how-to-create-animated-gif-from-a-video-file-using-vlc-and-gimp,multiple
d02b9364-6bb0-4c7e-9dbd-4db62822bc26,https://stackoverflow.com/questions/38306910/simple-python-script-to-get-a-libreoffice-base-field-and-play-on-vlc,multiple
57fb469b-127a-46fa-8281-bbb3840efdf5,https://support.mozilla.org/en-US/questions/1150626,multiple
3680a5ee-6870-426a-a997-eba929a0d25c,https://unix.stackexchange.com/questions/510850/how-to-open-calc-from-terminal-and-insert-files,multiple
2d8c8a20-6f54-4c2e-ad56-61fbe7af6b78,https://www.quora.com/How-do-I-force-LibreOffice-Calc-to-recalculate-a-spreadsheet-from-the-command-line,multiple
ee9a3c83-f437-4879-8918-be5efbb9fac7,https://stackoverflow.com/questions/64589140/convert-ods-to-csv-using-command-line-when-libreoffice-instance-is-running,multiple
f7dfbef3-7697-431c-883a-db8583a4e4f9,https://www.thegeekdiary.com/libreoffice-command-examples-in-linux/,multiple
2b9493d7-49b8-493a-a71b-56cd1f4d6908,https://devicetests.com/kill-libreoffice-writer-command-line-ubuntu,multiple
51f5801c-18b3-4f25-b0c3-02f85507a078,https://github.com/danielrcollins1/ImpressExtractNotes,multiple
81de345e-5473-4cb6-a74d-b6abf3475a6a,https://stackoverflow.com/questions/45588952/how-can-i-compose-and-send-email-in-thunderbird-from-commandline,multiple
2c9fc0de-3ee7-45e1-a5df-c86206ad78b5,https://nikki-ricks.medium.com/how-to-use-git-add-commit-and-push-in-vs-code-and-command-line-35c0e8c47b62,multiple
510f64c8-9bcc-4be1-8d30-638705850618,https://www.geeksforgeeks.org/how-to-start-vs-code-from-the-terminal-command-line/,multiple
9ff484f7-5c09-4398-ae29-d5904e59e138,https://stackoverflow.com/questions/38606973/playing-opening-and-pausing-vlc-command-line-executed-from-python-scripts,multiple
d9b7c649-c975-4f53-88f5-940b29c47247,https://marketplace.uipath.com/listings/extract-the-first-1000-gmail-emails-from-the-current-month-in-a-new-google-sheets-report,multiple
be4ef0dc-0f70-4936-81d8-3cd2b04482f8,https://marketplace.uipath.com/listings/table-data-extraction-for-sales-opportunities-to-excel-workbook,multiple
78aed49a-a710-4321-a793-b611a7c5b56b,https://marketplace.uipath.com/listings/upload-email-attachments-from-gmail-to-google-drive,multiple
897e3b53-5d4d-444b-85cb-2cdc8a97d903,https://marketplace.uipath.com/listings/convert-word-file-to-pdf-and-store-in-onedrive,multiple
4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc,https://marketplace.uipath.com/listings/extract-data-from-a-new-invoice-file-in-google-drive-and-store-it-in-google-sheets4473,multiple
b52b40a5-ad70-4c53-b5b0-5650a8387052,https://marketplace.uipath.com/listings/merge-pdfs-from-gmail-email-attachments-and-upload-to-gogle-drive,multiple
46407397-a7d5-4c6b-92c6-dbe038b1457b,https://marketplace.uipath.com/listings/upload-to-google-drive-images-from-pdf-attachments-received-via-gmail,multiple
a0b9dc9c-fc07-4a88-8c5d-5e3ecad91bcb,https://marketplace.uipath.com/listings/backup-important-emails-to-onedrive-or-sharepoint,multiple
665f4af1-617d-4009-baff-84ff66071e6a,https://www.howtogeek.com/663927/how-to-open-google-chrome-using-command-prompt-on-windows-10/#open-chrome-straight-to-a-specific-website,multiple
e6313b30-3903-4ed9-8c7d-4c47bf51fc96,https://stackoverflow.com/questions/12258086/how-do-i-run-google-chrome-as-root,multiple
1 id Source InvolvedApp
2 94d95f96-9699-4208-98ba-3c3119edf9c2 https://help.ubuntu.com/lts/ubuntu-help/addremove-install.html.en OS
3 bedcedc4-4d72-425e-ad62-21960b11fe0d https://www.youtube.com/watch?v=D4WyNjt_hbQ&t=2s OS
4 43c2d64c-bab5-4dcb-a30c-b888321c319a https://ubuntu.com/tutorials/command-line-for-beginners#4-creating-folders-and-files OS
5 7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82 https://ubuntu.com/tutorials/command-line-for-beginners#5-moving-and-manipulating-files OS
6 ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3 https://www.youtube.com/watch?v=D4WyNjt_hbQ&t=2s OS
7 a462a795-fdc7-4b23-b689-e8b6df786b78 https://help.ubuntu.com/lts/ubuntu-help/shell-exit.html.en OS
8 f9be0997-4b7c-45c5-b05c-4612b44a6118 https://help.ubuntu.com/lts/ubuntu-help/shell-notifications.html.en OS
9 ae039631-2b12-4637-84f6-c67d51511be3 https://help.ubuntu.com/lts/ubuntu-help/net-default-browser.html.en OS
10 e2eb4bf1-aa93-4192-b55d-03e2fb6dfd15 https://help.ubuntu.com/lts/ubuntu-help/contacts-add-remove.html.en OS
11 28cc3b7e-b194-4bc9-8353-d04c0f4d56d2 https://help.ubuntu.com/lts/ubuntu-help/sound-volume.html.en OS
12 5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57 https://help.ubuntu.com/lts/ubuntu-help/files-recover.html.en OS
13 e0df059f-28a6-4169-924f-b9623e7184cc https://help.ubuntu.com/lts/ubuntu-help/files-rename.html.en OS
14 ddc75b62-7311-4af8-bfb3-859558542b36 https://help.ubuntu.com/lts/ubuntu-help/addremove-remove.html.en OS
15 5c433d22-ed9a-4e31-91f5-54cf3e8acd63 https://help.ubuntu.com/lts/ubuntu-help/session-language.html.zh-CN OS
16 b6781586-6346-41cd-935a-a6b1487918fc https://help.ubuntu.com/lts/ubuntu-help/clock-timezone.html.en OS
17 b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa https://help.ubuntu.com/lts/ubuntu-help/bluetooth-turn-on-off.html.en OS
18 3ce045a0-877b-42aa-8d2c-b4a863336ab8 https://help.ubuntu.com/lts/ubuntu-help/a11y-font-size.html.en OS
19 fe41f596-a71b-4c2f-9b2f-9dcd40b568c3 https://help.ubuntu.com/lts/ubuntu-help/power-percentage.html.en OS
20 a4d98375-215b-4a4d-aee9-3d4370fccc41 https://help.ubuntu.com/lts/ubuntu-help/privacy-screen-lock.html.en OS
21 765d2b74-88a7-4d50-bf51-34e4106fd24a https://help.ubuntu.com/lts/ubuntu-help/files-delete.html.en OS
22 cc9d4f34-1ca0-4a1b-8ff2-09302696acb9 https://superuser.com/questions/178587/how-do-i-detach-a-process-from-terminal-entirely OS
23 5812b315-e7bd-4265-b51f-863c02174c28 https://superuser.com/questions/149404/create-an-ssh-user-who-only-has-permission-to-access-specific-folders OS
24 c56de254-a3ec-414e-81a6-83d2ce8c41fa https://superuser.com/questions/28426/how-to-extract-text-with-ocr-from-a-pdf-on-linux OS
25 6ebbfb01-ea72-4226-a2a6-dc428e111ed2 https://superuser.com/questions/46748/how-do-i-make-bash-my-default-shell-on-ubuntu OS
26 4d2b519e-e872-4100-8ea3-fe71ab0f9133 https://stackoverflow.com/questions/11530090/adding-a-new-entry-to-the-path-variable-in-zsh OS
27 c288e301-e626-4b98-a1ab-159dcb162af5 https://stackoverflow.com/questions/41986507/unable-to-set-default-python-version-to-python3-in-ubuntu OS
28 13584542-872b-42d8-b299-866967b5c3ef https://superuser.com/questions/72176/linux-set-default-terminal-size-and-screen-position OS
29 23393935-50c7-4a86-aeea-2b78fd089c5c https://superuser.com/questions/91307/copying-only-jpg-from-a-directory-structure-to-another-location-linux OS
30 f10b16e1-c160-4cb3-989f-7b2ec89bc073 https://www.wikihow.com/Install-Gnome-on-Ubuntu OS
31 eb03d19a-b88d-4de4-8a64-ca0ac66f426b https://www.youtube.com/shorts/t9JLUaT55UQ MS Excel
32 0bf05a7d-b28b-44d2-955a-50b41e24012a https://www.youtube.com/shorts/FPAQaDTS8VY MS Excel
33 7b802dad-6e0f-4204-9815-d4e3f57627d8 https://www.youtube.com/shorts/Of-lzeP1usE MS Excel
34 7a4e4bc8-922c-4c84-865c-25ba34136be1 https://www.youtube.com/shorts/bvUhr1AHs44 MS Excel
35 2bd59342-0664-4ccb-ba87-79379096cc08 https://www.youtube.com/shorts/L3Z-F1QTQFY MS Excel
36 a9f325aa-8c05-4e4f-8341-9e4358565f4f https://www.youtube.com/shorts/A0gmEBRKXWs MS Excel
37 ecb0df7a-4e8d-4a03-b162-053391d3afaf https://www.youtube.com/shorts/tXOovKn0H68 MS Excel
38 7efeb4b1-3d19-4762-b163-63328d66303b https://www.youtube.com/shorts/4jzXfZNhfmk MS Excel
39 4e6fcf72-daf3-439f-a232-c434ce416af6 https://www.youtube.com/shorts/0uxJccNCKcE MS Excel
40 6054afcb-5bab-4702-90a0-b259b5d3217c https://www.youtube.com/shorts/JTbZ8sRxkdU MS Excel
41 abed40dc-063f-4598-8ba5-9fe749c0615d https://www.youtube.com/shorts/xgf4ZpsEx5M MS Excel
42 01b269ae-2111-4a07-81fd-3fcd711993b0 https://www.youtube.com/shorts/VrUzPTIwQ04 MS Excel
43 8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14 https://www.youtube.com/shorts/Hbcwu6IQ1ns MS Excel
44 af2b02f7-acee-4be4-8b66-499fab394915 https://www.youtube.com/shorts/AwKsb5VmtBI MS Excel
45 da1d63b8-fa12-417b-ba18-f748e5f770f3 https://www.youtube.com/shorts/hquscnbz2-U MS Excel
46 636380ea-d5f6-4474-b6ca-b2ed578a20f1 https://www.youtube.com/shorts/_BYL6VOHLGw MS Excel, Edge
47 5ba77536-05c5-4aae-a9ff-6e298d094c3e https://www.youtube.com/shorts/CuBC1evUS5I MS Excel
48 4bc4eaf4-ca5e-4db2-8138-8d4e65af7c0b https://www.youtube.com/shorts/1adQWfjN-tI MS Excel
49 672a1b02-c62f-4ae2-acf0-37f5fb3052b0 https://www.youtube.com/shorts/2rhdQXI4Lng MS Excel
50 648fe544-16ba-44af-a587-12ccbe280ea6 https://www.youtube.com/shorts/sOPBMWaC6Uc MS Excel
51 8985d1e4-5b99-4711-add4-88949ebb2308 https://www.youtube.com/shorts/J5ts2Acv9Pc MS Excel
52 9e606842-2e27-43bf-b1d1-b43289c9589b https://www.youtube.com/shorts/B-mGYDFOyUs MS Excel
53 fcb6e45b-25c4-4087-9483-03d714f473a9 https://www.youtube.com/shorts/GZipp7nOZS0 MS Excel
54 68c0c5b7-96f3-4e87-92a7-6c1b967fd2d2 https://www.youtube.com/shorts/JEH5TsK-cCk MS Excel, Edge
55 fff629ea-046e-4793-8eec-1a5a15c3eb35 https://www.youtube.com/shorts/8WybtCdUT6w MS Excel
56 5c9a206c-bb00-4fb6-bb46-ee675c187df5 https://www.youtube.com/shorts/VbQtMNnq9i4 MS Excel
57 e975ae74-79bd-4672-8d1c-dc841a85781d https://www.youtube.com/shorts/GjT7gGe5Sr8 MS Excel
58 34a6938a-58da-4897-8639-9b90d6db5391 https://www.youtube.com/shorts/gW37x2TkzOY MS Excel
59 b5a22759-b4eb-4bf2-aeed-ad14e8615f19 https://www.youtube.com/shorts/3xLa-D0C7Ic MS Excel
60 2f9913a1-51ed-4db6-bfe0-7e1c95b3139e https://www.youtube.com/shorts/dGLRcmfVO6Q MS Excel
61 2558031e-401d-4579-8e00-3ecf540fb492 https://www.mrexcel.com/board/threads/sales-for-the-first-6-weeks.1249213/ MS Excel
62 39aa4e37-dc91-482e-99af-132a612d40f3 https://www.libreofficehelp.com/add-insert-delete-copy-move-rename-a-worksheet-in-libreoffice-calc/ LibreOffice Calc
63 0cecd4f3-74de-457b-ba94-29ad6b5dafb6 https://www.libreofficehelp.com/add-insert-delete-copy-move-rename-a-worksheet-in-libreoffice-calc/ LibreOffice Calc
64 4188d3a4-077d-46b7-9c86-23e1a036f6c1 https://www.libreofficehelp.com/freeze-unfreeze-rows-columns-ranges-calc/ LibreOffice Calc
65 51b11269-2ca8-4b2a-9163-f21758420e78 https://www.reddit.com/r/LibreOfficeCalc/comments/186pcc6/how_to_arrange_numbers_in_a_column_from_minimum/ LibreOffice Calc
66 7e429b8d-a3f0-4ed0-9b58-08957d00b127 https://medium.com/@divyangichaudhari17/how-to-use-vlookup-and-hlookup-in-libre-calc-3370698bb3ff LibreOffice Calc
67 f5a90742-3fa2-40fc-a564-f29b054e0337 https://superuser.com/questions/1236149/libreoffice-calc-how-to-apply-functions-to-columns LibreOffice Calc
68 22df9241-f8d7-4509-b7f1-37e501a823f7 https://superuser.com/questions/1767185/how-do-you-move-cells-in-libreoffice-calc LibreOffice Calc
69 1434ca3e-f9e3-4db8-9ca7-b4c653be7d17 https://www.wikihow.com/Remove-Duplicates-in-Open-Office-Calc LibreOffice Calc
70 347ef137-7eeb-4c80-a3bb-0951f26a8aff https://www.youtube.com/watch?v=bgO40-CjYNY LibreOffice Calc
71 6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5 https://www.youtube.com/watch?v=nl-nXjJurhQ LibreOffice Calc
72 3aaa4e37-dc91-482e-99af-132a612d40f3 https://www.quora.com/How-can-you-import-export-CSV-files-with-LibreOffice-Calc-or-OpenOffice LibreOffice Calc
73 0decd4f3-74de-457b-ba94-29ad6b5dafb6 https://justclickhere.co.uk/resources/checkboxes-tick-boxes-libreoffice-calc/ LibreOffice Calc
74 37608790-6147-45d0-9f20-1137bb35703d https://www.youtube.com/shorts/uzPo_CPCHH8 MS Excel
75 f9584479-3d0d-4c79-affa-9ad7afdd8850 https://youtube.com/shorts/feldd-Pn48c?si=9xJiem2uAHm6Jshb LibreOffice Calc
76 d681960f-7bc3-4286-9913-a8812ba3261a https://www.youtube.com/shorts/d7U1S_IsTVM LibreOffice Calc
77 f6a90742-3fa2-40fc-a564-f29b054e0337 https://www.excel-easy.com/examples/drop-down-list.html LibreOffice Calc
78 21df9241-f8d7-4509-b7f1-37e501a823f7 https://www.youtube.com/watch?v=p5C4V_AO1UU LibreOffice Calc
79 1334ca3e-f9e3-4db8-9ca7-b4c653be7d17 https://techcommunity.microsoft.com/t5/excel/excel-workbook-top-way-too-big-can-t-see-rows-and-columns/m-p/4014694 LibreOffice Calc
80 357ef137-7eeb-4c80-a3bb-0951f26a8aff https://www.reddit.com/r/excel/comments/17zny8u/calculating_total_amount_earned_from_total_hours/ LibreOffice Calc
81 6f99a1ad-07d2-4b66-a1ce-ece6d99c20a5 https://techcommunity.microsoft.com/t5/excel/sumarize-the-sheetnames/m-p/4014716 LibreOffice Calc
82 aa3a8974-2e85-438b-b29e-a64df44deb4b https://www.quora.com/Libre-Office-Calc-How-do-I-resize-all-cells-in-a-sheet-to-make-them-fit-to-1-page-for-printing-and-exporting-as-PDF LibreOffice Calc
83 a01fbce3-2793-461f-ab86-43680ccbae25 https://superuser.com/questions/1250677/how-to-set-decimal-separator-in-libre-office-calc LibreOffice Calc
84 4f07fbe9-70de-4927-a4d5-bb28bc12c52c https://superuser.com/questions/1081048/libreoffice-calc-how-to-pad-number-to-fixed-decimals-when-used-within-formula LibreOffice Calc
85 e3b1d5fa-ed00-4129-bda1-1452bd2b6772 https://www.reddit.com/r/libreoffice/comments/tel112/calc_how_to_calculate_sum_by_categories/ LibreOffice Calc
86 ca6a9524-f8e9-4d2f-9364-ab0cad567739 https://www.reddit.com/r/libreoffice/comments/113gmyc/how_to_remove_certain_text_from_cells_in_calc/ LibreOffice Calc
87 a455e8d0-930f-40d2-9575-5e8d2d222f58 https://superuser.com/questions/562944/quickly-fill-blank-cells-in-a-list-in-libreoffice-calc LibreOffice Calc
88 83ee22c6-7737-49ce-9b5a-138c3e92af04 https://superuser.com/questions/661102/currency-conversion-in-libreoffice-calc LibreOffice Calc
89 819f61c2-ec77-4d3f-9996-0838ae5aacc8 https://superuser.com/questions/381696/creating-a-column-of-working-days-in-libreoffice-calc LibreOffice Calc
90 69d577b3-004e-4bca-89b2-0d7c2f6049e3 https://superuser.com/questions/387106/libreoffice-calc-how-to-get-total-for-hhmmss-cells LibreOffice Calc
91 0a1bf4ca-d4ea-4618-baa5-6e8dc1b46d82 https://superuser.com/questions/571915/sum-up-to-n-highest-value-out-of-a-series LibreOffice Calc
92 ac9bb6cb-1888-43ab-81e4-a98a547918cd https://superuser.com/questions/1674211/how-to-change-colour-of-slide-number-in-libre-office LibreOffice Impress
93 5d901039-a89c-4bfb-967b-bf66f4df075e https://superuser.com/questions/986776/how-can-i-stretch-an-image-in-a-libreoffice-impress-presentation-to-fill-the-pag LibreOffice Impress
94 071d4ace-091a-4ec3-886e-f4be55ae375d https://superuser.com/questions/706860/hide-slide-numbers-and-slide-footer-on-first-and-second-slide-in-libreoffice-imp?rq=1 LibreOffice Impress
95 550ce7e7-747b-495f-b122-acdc4d0b8e54 https://technical-tips.com/blog/software/text-in-libreoffice-strikethrough--6948#:~:text=To%20strikethrough%20Text%20in%20LibreOffice%201%20In%20your,effect%22%20can%20your%20additionally%2C%20for%20example%2C%20double%20underline. LibreOffice Impress
96 455d3c66-7dc6-4537-a39a-36d3e9119df7 https://www.libreofficehelp.com/export-libreoffice-impress-slides-images/#:~:text=Exporting%20a%20single%20slide%20as.jpg%2C.png%2C%20etc%20image%20is,on%20the%20checkbox%20Selection.%20Provide%20jpg%20quality%20options. LibreOffice Impress
97 af23762e-2bfd-4a1d-aada-20fa8de9ce07 https://superuser.com/questions/1059080/how-to-make-a-summary-slide-in-impress-listing-the-titles-of-all-slides-autom LibreOffice Impress
98 c59742c0-4323-4b9d-8a02-723c251deaa0 https://www.reddit.com/r/libreoffice/comments/17lcdrp/audio_not_supported_in_libreoffice_impress/ LibreOffice Impress
99 39478d4a-1049-456f-aa77-407811393add https://www.reddit.com/r/libreoffice/comments/jul3o8/putting_cap_or_hat_or_carat_symbol_in_libre/ LibreOffice Impress
100 c3ad4442-499f-4e58-bc4e-1a1417ea9b8c http://maharajacollege.ac.in/material/Libreofficeimpresspdf.pdf LibreOffice Impress
101 ef9d12bd-bcee-4ba0-a40e-918400f43ddf https://www.reddit.com/r/libreoffice/comments/18elh3y/i_closed_the_slide_pannel_on_the_left_and_idk_how/ LibreOffice Impress
102 9ec204e4-f0a3-42f8-8458-b772a6797cab https://www.tiktok.com/@lil.d1rt_/video/7247574148887629083 LibreOffice Impress
103 0f84bef9-9790-432e-92b7-eece357603fb https://stackoverflow.com/questions/29036788/how-to-disable-libreoffice-impress-to-use-multiple-display LibreOffice Impress
104 ce88f674-ab7a-43da-9201-468d38539e4a https://justclickhere.co.uk/resources/change-slides-in-impress-to-portrait/ LibreOffice Impress
105 f0a334af-f91b-4c03-b578-aac9bec2b543 https://www.libreofficehelp.com/insert-video-impress-presentation/#Inserting_a_Video_in_Impress LibreOffice Impress
106 3b27600c-3668-4abd-8f84-7bcdebbccbdb https://www.libreofficehelp.com/change-slide-background-impress/#All_Slides LibreOffice Impress
107 a097acff-6266-4291-9fbd-137af7ecd439 https://www.youtube.com/watch?v=DDmEvjs4iBw LibreOffice Impress
108 21760ecb-8f62-40d2-8d85-0cee5725cb72 https://www.libreofficehelp.com/add-animations-transitions-libreoffice-impress-slides/ LibreOffice Impress
109 3cc4f35d-fa2e-4555-afb9-741b7c062a74 https://documentation.libreoffice.org/assets/Uploads/Documentation/en/IG7.6/IG76-ImpressGuide.pdf LibreOffice Impress
110 6ada715d-3aae-4a32-a6a7-429b2e43fb93 https://www.quora.com/How-do-you-insert-images-into-a-LibreOffice-Writer-document LibreOffice Writer
111 ecc2413d-8a48-416e-a3a2-d30106ca36cb https://www.quora.com/How-can-I-insert-a-blank-page-on-libreoffice LibreOffice Writer
112 0e47de2a-32e0-456c-a366-8c607ef7a9d2 https://ask.libreoffice.org/t/how-to-start-page-numbering-on-a-certain-page/39931/4 LibreOffice Writer
113 4bcb1253-a636-4df4-8cb0-a35c04dfef31 https://www.libreofficehelp.com/save-export-writer-documents-in-pdf-epub-format/ LibreOffice Writer
114 0810415c-bde4-4443-9047-d5f70165a697 https://www.youtube.com/watch?v=Q_AaL6ljudU LibreOffice Writer
115 e528b65e-1107-4b8c-8988-490e4fece599 https://www.youtube.com/watch?v=l25Evu4ohKg LibreOffice Writer
116 66399b0d-8fda-4618-95c4-bfc6191617e9 https://www.youtube.com/watch?v=l25Evu4ohKg LibreOffice Writer
117 936321ce-5236-426a-9a20-e0e3c5dc536f https://www.youtube.com/watch?v=l25Evu4ohKg LibreOffice Writer
118 663876c7-3471-43db-ba51-f410b13d9d7d https://askubuntu.com/questions/319593/how-to-type-science-equations-in-libre-office LibreOffice Writer
119 3ef2b351-8a84-4ff2-8724-d86eae9b842e https://askubuntu.com/questions/1066351/how-do-you-center-align-in-libreoffice#:~:text=Ctrl%20%2B%20e%20will%20Center%20align%20the%20cursor%20for%20you. LibreOffice Writer
120 45d61a06-6545-4422-97b7-bc76cfa964c1 https://stackoverflow.com/questions/71685737/how-to-replace-all-newlines-with-paragraph-marks-in-libreoffice-write LibreOffice Writer
121 0b17a146-2934-46c7-8727-73ff6b6483e8 https://askubuntu.com/questions/245695/how-do-you-insert-subscripts-and-superscripts-into-ordinary-non-formula-text-i LibreOffice Writer
122 0e763496-b6bb-4508-a427-fad0b6c3e195 https://ask.libreoffice.org/t/how-do-i-change-the-font-for-the-whole-document-in-writer/9220 LibreOffice Writer
123 f178a4a9-d090-4b56-bc4c-4b72a61a035d https://ask.libreoffice.org/t/how-do-i-make-times-new-roman-the-default-font-in-lo/64604 LibreOffice Writer
124 0a0faba3-5580-44df-965d-f562a99b291c https://stackoverflow.com/questions/64528055/how-to-make-part-of-my-sentence-left-aligned-and-rest-as-right-aligned LibreOffice Writer
125 e246f6d8-78d7-44ac-b668-fcf47946cb50 https://ask.libreoffice.org/t/how-to-change-text-size-color-of-italic-font/77712 LibreOffice Writer
126 8472fece-c7dd-4241-8d65-9b3cd1a0b568 https://stackoverflow.com/questions/37259827/libreoffice-writer-how-to-set-different-colors-to-each-letter LibreOffice Writer
127 88fe4b2d-3040-4c70-9a70-546a47764b48 https://stackoverflow.com/questions/56554555/libreoffice-writer-how-to-create-empty-line-space-after-every-period-in-a-par LibreOffice Writer
128 6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2 https://superuser.com/questions/762500/how-do-i-find-all-highlighted-text-in-libreoffice-writer LibreOffice Writer
129 d53ff5ee-3b1a-431e-b2be-30ed2673079b https://ask.libreoffice.org/t/how-to-convert-all-uppercase-to-lowercase/53341 LibreOffice Writer
130 72b810ef-4156-4d09-8f08-a0cf57e7cefe https://superuser.com/questions/657792/libreoffice-writer-how-to-apply-strikethrough-text-formatting?rq=1 LibreOffice Writer
131 6f81754e-285d-4ce0-b59e-af7edb02d108 https://superuser.com/questions/789473/remove-duplicate-lines-in-libreoffice-openoffice-writer LibreOffice Writer
132 41c621f7-3544-49e1-af8d-dafd0f834f75 https://superuser.com/questions/1668018/how-to-auto-format-lines-in-libre-office-writer LibreOffice Writer
133 b21acd93-60fd-4127-8a43-2f5178f4a830 https://superuser.com/questions/1097199/how-can-i-double-space-a-document-in-libreoffice?rq=1 LibreOffice Writer
134 59f21cfb-0120-4326-b255-a5b827b38967 https://docs.videolan.me/vlc-user/desktop/3.0/en/basic/media.html#playing-a-file VLC player
135 8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89 https://docs.videolan.me/vlc-user/desktop/3.0/en/basic/recording/playing.html#choose-your-recordings-folder VLC player
136 8f080098-ddb1-424c-b438-4e96e5e4786e https://medium.com/@jetscribe_ai/how-to-extract-mp3-audio-from-videos-using-vlc-media-player-beeef644ebfb VLC player
137 bba3381f-b5eb-4439-bd9e-80c22218d5a7 https://www.quora.com/How-do-I-play-online-videos-using-the-VLC-media-player VLC player
138 a1c3ab35-02de-4999-a7ed-2fd12c972c6e https://www.quora.com/How-do-I-compress-a-video-with-VLC VLC player
139 fba2c100-79e8-42df-ae74-b592418d54f4 https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s VLC player
140 d70666e4-7348-42c7-a06a-664094c5df3c https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s VLC player
141 efcf0d81-0835-4880-b2fd-d866e8bc2294 https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s, https://help.ubuntu.com/stable/ubuntu-help/look-background.html.en VLC player
142 8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s VLC player
143 aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6 https://videoconverter.wondershare.com/vlc/how-to-rotate-a-video-using-vlc.html?gad_source=1&gclid=CjwKCAiA-vOsBhAAEiwAIWR0TaGSOLkYiBeVQGZSyfeUP3g-tIvYxffl5RFIu0-zrUL1IF41eCw1JRoCnCMQAvD_BwE VLC player
144 386dbd0e-0241-4a0a-b6a2-6704fba26b1c https://superuser.com/questions/1708415/pause-and-play-vlc-in-background?rq=1 VLC player
145 9195653c-f4aa-453d-aa95-787f6ccfaae9 https://superuser.com/questions/1513285/how-can-i-increase-the-maximum-volume-output-by-vlc?rq=1 VLC player
146 5ac2891a-eacd-4954-b339-98abba077adb https://superuser.com/questions/1412810/how-to-prevent-vlc-media-player-from-auto-closing-after-video-end#:%7E:text=Click%20on%20%22Media%22on%20the,VLC%20player%20after%20video%20ending VLC player
147 0d95d28a-9587-433b-a805-1fbe5467d598 https://superuser.com/questions/1299036/vlc-how-to-open-the-folder-of-the-current-playing-video?noredirect=1&lq=1 VLC player
148 d06f0d4d-2cd5-4ede-8de9-598629438c6e https://superuser.com/questions/1039392/changing-colour-of-vlc-volume-slider VLC player
149 a5bbbcd5-b398-4c91-83d4-55e1e31bbb81 https://superuser.com/questions/776056/how-to-hide-bottom-toolbar-in-vlc VLC player
150 f3977615-2b45-4ac5-8bba-80c17dbe2a37 https://www.reddit.com/r/Fedora/comments/rhljzd/how_to_run_multiple_instances_of_vlc_media_player/ VLC player
151 c669a35f-d45a-450e-b1f2-f473748337bb https://www.quora.com/How-do-I-fast-forward-a-video-in-VLC-player VLC player
152 d1ba14d0-fef8-4026-8418-5b581dc68ca0 https://superuser.com/questions/306154/how-to-use-a-b-repeat-feature-of-vlc VLC player
153 215dfd39-f493-4bc3-a027-8a97d72c61bf https://superuser.com/questions/1224784/how-to-change-vlcs-splash-screen VLC player
154 bb5e4c0d-f964-439c-97b6-bdb9747de3f4 https://www.wikihow.com/Remove-an-Email-Account-from-Thunderbird ThunderBird
155 7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3 https://www.wikihow.com/Access-Gmail-With-Mozilla-Thunderbird ThunderBird
156 b188fe10-ae67-4db8-a154-26a0b8ff8f1e https://www.reddit.com/r/Thunderbird/comments/17vv2os/restore_readability_in_message_list_pane/ ThunderBird
157 12086550-11c0-466b-b367-1d9e75b3910e https://www.bitrecover.com/blog/manage-thunderbird-profiles/ ThunderBird
158 06fe7178-4491-4589-810f-2e2bc9502122 https://www.quora.com/How-do-I-backup-email-files-in-Mozilla-Thunderbird ThunderBird
159 6766f2b8-8a72-417f-a9e5-56fcaa735837 https://www.adsigner.com/user-manual/signatures/setup-email-client-thunderbird/#:~:text=is%20probably%20hidden.-,Right%20click%20on%20the%20empty%20space%20at%20the%20top%20of,signature%20from%20a%20file%20instead. ThunderBird
160 e1e75309-3ddb-4d09-92ec-de869c928143 https://support.mozilla.org/en-US/kb/organize-your-messages-using-filters ThunderBird
161 3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5 https://support.mozilla.org/en-US/kb/organize-your-messages-using-filters ThunderBird
162 35253b65-1c19-4304-8aa4-6884b8218fc0 https://support.mozilla.org/en-US/questions/1259354 ThunderBird
163 d088f539-cab4-4f9a-ac92-9999fc3a656e https://support.mozilla.org/en-US/kb/how-use-attachments ThunderBird
164 2ad9387a-65d8-4e33-ad5b-7580065a27ca https://support.mozilla.org/bm/questions/1027435, https://www.wikihow.tech/Create-Folders-in-Mozilla-Thunderbird ThunderBird
165 480bcfea-d68f-4aaa-a0a9-2589ef319381 https://www.reddit.com/r/Thunderbird/comments/182dg5p/unified_inbox_howto/ ThunderBird
166 37b9808f-b2b4-4177-ab00-9ddfae4bad27 https://www.quora.com/How-can-I-schedule-Mozilla-Thunderbird-to-turn-off-automatically ThunderBird
167 af630914-714e-4a24-a7bb-f9af687d3b91 https://stackoverflow.com/questions/11333148/adding-a-toolbar-button-to-a-thundebird-compose-message-window?rq=3 ThunderBird
168 3299584d-8f11-4457-bf4c-ce98f7600250 https://superuser.com/questions/1643561/would-like-to-see-the-email-address-from-sender-in-the-column ThunderBird
169 030eeff7-b492-4218-b312-701ec99ee0cc https://superuser.com/questions/1781004/how-do-i-remove-the-indentation-and-character-in-quoted-text-of-a-reply-mess ThunderBird
170 94760984-3ff5-41ee-8347-cf1af709fea0 https://superuser.com/questions/1757333/how-can-i-view-thunderbird-in-full-dark-mode ThunderBird
171 99146c54-4f37-4ab8-9327-5f3291665e1e https://superuser.com/questions/1764409/how-to-send-email-with-thunderbird-without-configuring-an-incoming-email-service ThunderBird
172 9656a811-9b5b-4ddf-99c7-5117bcef0626 https://superuser.com/questions/205240/is-there-a-way-to-get-a-popup-confirmation-box-when-you-send-an-email-in-thunder?rq=1 ThunderBird
173 c9e7eaf2-b1a1-4efc-a982-721972fa9f02 https://superuser.com/questions/544480/how-to-apply-automatic-message-filters-to-subfolders-too?noredirect=1&lq=1 ThunderBird
174 bb5e4c0d-f964-439c-97b6-bdb9747de3f4 https://support.google.com/chrome/answer/95426?sjid=16867045591165135686-AP Chrome
175 7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3 https://support.google.com/chrome/answer/95647?hl=en&ref_topic=7438325&sjid=16867045591165135686-AP#zippy=%2Cdelete-cookies-from-a-site Chrome
176 12086550-11c0-466b-b367-1d9e75b3910e https://www.quora.com/What-are-the-cool-tricks-to-use-Google-Chrome Chrome
177 06fe7178-4491-4589-810f-2e2bc9502122 https://www.wikihow.com/Switch-Tabs-in-Chrome Chrome
178 6766f2b8-8a72-417f-a9e5-56fcaa735837 https://support.google.com/chrome/thread/205881926/it-s-possible-to-load-unpacked-extension-automatically-in-chrome?hl=en Chrome
179 e1e75309-3ddb-4d09-92ec-de869c928143 https://in5stepstutorials.com/google-chrome/save-web-page-as-pdf-in-chrome.php Chrome
180 3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5 https://in5stepstutorials.com/google-chrome/add-change-delete-autofill-address.php Chrome
181 35253b65-1c19-4304-8aa4-6884b8218fc0 https://www.laptopmag.com/articles/how-to-create-desktop-shortcuts-for-web-pages-using-chrome, https://www.reddit.com/r/chrome/comments/13xcbap/crete_shortcut_option_missing/ Chrome
182 d088f539-cab4-4f9a-ac92-9999fc3a656e https://medium.com/@inkverseuk2/useful-tips-and-tricks-for-the-google-chrome-browser-ac7d0d24b3cc Chrome
183 2ad9387a-65d8-4e33-ad5b-7580065a27ca https://www.youtube.com/watch?v=IN-Eq_UripQ Chrome
184 7a5a7856-f1b6-42a4-ade9-1ca81ca0f263 https://www.youtube.com/watch?v=ZaZ8GcTxjXA Chrome
185 3720f614-37fd-4d04-8a6b-76f54f8c222d https://superuser.com/questions/984668/change-interface-language-of-chrome-to-english Chrome
186 b63059a2-53bc-4163-a89f-3ac948c74081 https://superuser.com/questions/1303418/how-do-i-make-chrome-block-absolutely-all-pop-ups?rq=1 Chrome
187 44ee5668-ecd5-4366-a6ce-c1c9b8d4e938 https://superuser.com/questions/1787991/clear-browsing-history-from-specific-site-on-chrome Chrome
188 b5ebc8c6-6329-4373-85b4-9421c97375e9 https://superuser.com/questions/364470/is-there-a-way-to-view-google-chrome-browsing-history-past-three-months-ago?rq=1 Chrome
189 93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9 https://superuser.com/questions/1417973/how-to-disable-google-chrome-dark-mode Chrome
190 2ae9ba84-3a0d-4d4c-8338-3a1478dc5fe3 https://superuser.com/questions/1393683/how-to-change-the-username-in-google-chrome-profiles?rq=1 Chrome
191 480bcfea-d68f-4aaa-a0a9-2589ef319381 https://bugartisan.medium.com/disable-the-new-chrome-ui-round-in-2023-db168271f71e Chrome
192 37b9808f-b2b4-4177-ab00-9ddfae4bad27 https://www.reddit.com/r/chrome/comments/17niw3h/tutorial_how_to_disable_the_download_bubble_in/ Chrome
193 af630914-714e-4a24-a7bb-f9af687d3b91 https://www.howtogeek.com/680260/how-to-change-chromes-default-text-size/ Chrome
194 ae78f875-5b98-4907-bbb5-9c737fc68c03 https://support.google.com/chrome/thread/219988391/increase-search-results-per-page?hl=en Chrome
195 0ed39f63-6049-43d4-ba4d-5fa2fe04a951 https://www.quora.com/How-do-you-find-and-replace-text-in-Visual-Studio-Code VS Code
196 b421106e-b282-4c41-af72-37c95493f95f https://stackoverflow.com/questions/74153883/launch-vscode-with-new-txt-file VS Code
197 53ad5833-3455-407b-bbc6-45b4c79ab8fb https://www.youtube.com/watch?v=VqCgcpAypFQ VS Code
198 eabc805a-bfcf-4460-b250-ac92135819f6 https://www.youtube.com/watch?v=VqCgcpAypFQ VS Code
199 3486f395-ad68-459c-8c39-ea07de934dd4 https://www.youtube.com/watch?v=VqCgcpAypFQ VS Code
200 982d12a5-beab-424f-8d38-d2a48429e511 https://www.youtube.com/watch?v=ORrELERGIHs VS Code
201 4e60007a-f5be-4bfc-9723-c39affa0a6d3 https://campbell-muscle-lab.github.io/howtos_Python/pages/documentation/best_practices/vscode_docstring_extension/vscode_docstring_extension.html#:~:text=Type%2C%20Ctrl%20%2B%20Shift%20%2B%20P,select%20the%20NumPy%20docstring%20format. VS Code
202 e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2 https://superuser.com/questions/1386061/how-to-suppress-some-python-errors-warnings-in-vs-code VS Code
203 9439a27b-18ae-42d8-9778-5f68f891805e https://stackoverflow.com/questions/75832474/how-to-keep-cursor-in-debug-console-when-debugging-in-visual-studio-code VS Code
204 ae506c68-352c-4094-9caa-ee9d42052317 https://superuser.com/questions/1460404/get-visual-studio-code-terminal-history?rq=1 VS Code
205 ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae https://superuser.com/questions/1748097/vs-code-disable-tree-view-find-explorer-search VS Code
206 c714dcee-cad3-4e12-8f3c-12bdcfcdb048 https://superuser.com/questions/1417361/how-to-disable-file-filtering-in-vs-code-sidebar-explorer?rq=1 VS Code
207 930fdb3b-11a8-46fe-9bac-577332e2640e https://superuser.com/questions/1270103/how-to-switch-the-cursor-between-terminal-and-code-in-vscode VS Code
208 276cc624-87ea-4f08-ab93-f770e3790175 https://www.quora.com/unanswered/How-do-you-set-the-line-length-in-Visual-Studio-Code VS Code
209 9d425400-e9b2-4424-9a4b-d4c7abac4140 https://superuser.com/questions/1466771/is-there-a-way-to-make-editor-tabs-stack-in-vs-code VS Code
210 7a4deb26-d57d-4ea9-9a73-630f66a7b568 https://www.quora.com/How-do-I-edit-a-photo-in-GIMP GIMP
211 554785e9-4523-4e7a-b8e1-8016f565f56a https://www.quora.com/How-do-I-edit-a-photo-in-GIMP GIMP
212 77b8ab4d-994f-43ac-8930-8ca087d7c4b4 https://superuser.com/questions/1636113/how-to-get-gimp-to-recognize-images-or-pictures-folder-as-the-default-folder-for GIMP
213 f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce https://superuser.com/questions/612338/how-do-i-select-and-move-an-object-in-gimp GIMP
214 d52d6308-ec58-42b7-a2c9-de80e4837b2b https://superuser.com/questions/1447106/how-to-get-rid-of-the-gimp-tool-options-box GIMP
215 2a729ded-3296-423d-aec4-7dd55ed5fbb3 https://www.youtube.com/watch?v=lOzSiOIipSM GIMP
216 b148e375-fe0b-4bec-90e7-38632b0d73c2 https://www.quora.com/How-do-I-add-layers-in-GIMP GIMP
217 a746add2-cab0-4740-ac36-c3769d9bfb46 https://www.youtube.com/watch?v=_L_MMU22bAw GIMP
218 7b7617bd-57cc-468e-9c91-40c4ec2bcb3d https://www.youtube.com/watch?v=G_PjQAy0iiU GIMP
219 d16c99dc-2a1e-46f2-b350-d97c86c85c15 https://stackoverflow.com/questions/75185543/use-gimp-to-resize-image-in-one-layer-only GIMP
220 573f79b5-abfe-4507-b455-251d45fe6198 https://stackoverflow.com/questions/45196895/gimp-add-padding-to-multiple-images GIMP
221 06ca5602-62ca-47f6-ad4f-da151cde54cc https://stackoverflow.com/questions/74664666/how-to-export-palette-based-png-in-gimp GIMP
222 fa9b1e10-4d2d-4a13-af76-7efa822b6a8b https://stackoverflow.com/questions/24626608/how-to-combine-several-png-images-as-layers-in-a-single-xcf-image GIMP
223 6b2b72ed-3a10-4849-876a-750f7cdf3886 https://stackoverflow.com/questions/21018007/resize-image-to-fit-canvas-gimp GIMP
224 d0e42fd2-d290-46b3-b598-a6e2b7be9c85 https://stackoverflow.com/questions/56758689/stop-gimp-from-merging-layers-when-de-selecting GIMP
225 e2dd0213-26db-4349-abe5-d5667bfd725c https://superuser.com/questions/839650/how-to-move-an-inserted-text-box-in-gimp GIMP
226 f723c744-e62c-4ae6-98d1-750d3cd7d79d https://www.reddit.com/r/GIMP/comments/12e57w8/how_to_use_gimp_to_exaggerate_contrast/ GIMP
227 8d6b1c9c-1aab-47fe-9ba5-e84c838d0c57 https://www.quora.com/How-can-email-attachments-be-converted-into-a-word-document-using-Mozilla-Thunderbird multiple
228 11e1e614-9696-4d94-88c9-8e556880d41a https://ifttt.com/applets/L2A89geP-send-chrome-software-update-release-alerts-to-email multiple
229 57956154-f0fe-486b-88b8-e7126da035a9 https://zapier.com/apps/email/integrations/google-sheets/547/get-email-notifications-for-new-rows-in-a-google-sheets-spreadsheet multiple
230 ec14c524-b245-456d-abd6-ec12c746e9f8 https://zapier.com/apps/gmail/integrations/google-sheets/2618/save-new-gmail-emails-matching-certain-traits-to-a-google-spreadsheet multiple
231 cbf5fbda-425e-4619-bcf2-0ea8d4c0bfa3 https://zapier.com/apps/google-sheets/integrations/google-slides/13919/refresh-charts-on-a-google-slides-presentation-when-rows-are-updated-on-google-sheets multiple
232 a54284d0-7b93-4327-bfcc-3a421516dbdd https://superuser.com/questions/655622/cannot-drag-images-from-thunderbird-to-word multiple
233 58565672-7bfe-48ab-b828-db349231de6b https://superuser.com/questions/1792660/open-link-from-other-application-does-not-open-the-url-in-firefox multiple
234 6d72aad6-187a-4392-a4c4-ed87269c51cf https://superuser.com/questions/923171/converting-openoffice-impress-presentation-to-video-without-screen-recording multiple
235 937087b6-f668-4ba6-9110-60682ee33441 https://superuser.com/questions/187440/set-default-ubuntu-video-player-as-vlc multiple
236 f8cfa149-d1c1-4215-8dac-4a0932bad3c2 https://superuser.com/questions/1803088/libreoffice-calc-clears-clipboard multiple
237 5e974913-6905-4c3f-8b65-d7837f3931cc https://stackoverflow.com/questions/61856141/how-can-i-start-thunderbird-and-minimize-the-window-on-startup-in-ubuntu multiple
238 7c179dad-f1c7-4892-b53f-d1c4023d23c7 https://stackoverflow.com/questions/21155085/pasting-excel-tables-in-thunderbird-e-mail-client multiple
239 4a68b2dd-70f2-4532-9bc1-d21878bd8cb2 https://stackoverflow.com/questions/65669955/thunderbird-how-to-send-a-mail-to-all-receivers-of-a-folder multiple
240 c8457fde-b14b-4aba-b402-144842ea29e1 https://stackoverflow.com/questions/65788200/how-to-open-xlsx-files-in-ms-excel-from-vs-code multiple
241 81c425f5-78f3-4771-afd6-3d2973825947 https://www.zyxware.com/articles/3770/how-to-transfer-data-in-libreoffice-calc-to-libreoffice-writer-in-table-format multiple
242 bb83cab4-e5c7-42c7-a67b-e46068032b86 https://ask.libreoffice.org/t/save-impress-presentation-as-writer-document/5291/4 multiple
243 227d2f97-562b-4ccb-ae47-a5ec9e142fbb https://discourse.gnome.org/t/gimp-and-libre-office-writer/15430/4 multiple
244 a6bbc08c-51e9-4ee4-9327-83d05075d960 https://forum.openoffice.org/en/forum/viewtopic.php?t=105055 multiple
245 964e6e03-ba31-466b-8c15-5a351a81f675 https://www.maketecheasier.com/mail-merge-thunderbird-calc/ multiple
246 2fe4b718-3bd7-46ec-bdce-b184f5653624 https://www.thewindowsclub.com/how-to-create-animated-gif-from-a-video-file-using-vlc-and-gimp multiple
247 d02b9364-6bb0-4c7e-9dbd-4db62822bc26 https://stackoverflow.com/questions/38306910/simple-python-script-to-get-a-libreoffice-base-field-and-play-on-vlc multiple
248 57fb469b-127a-46fa-8281-bbb3840efdf5 https://support.mozilla.org/en-US/questions/1150626 multiple
249 3680a5ee-6870-426a-a997-eba929a0d25c https://unix.stackexchange.com/questions/510850/how-to-open-calc-from-terminal-and-insert-files multiple
250 2d8c8a20-6f54-4c2e-ad56-61fbe7af6b78 https://www.quora.com/How-do-I-force-LibreOffice-Calc-to-recalculate-a-spreadsheet-from-the-command-line multiple
251 ee9a3c83-f437-4879-8918-be5efbb9fac7 https://stackoverflow.com/questions/64589140/convert-ods-to-csv-using-command-line-when-libreoffice-instance-is-running multiple
252 f7dfbef3-7697-431c-883a-db8583a4e4f9 https://www.thegeekdiary.com/libreoffice-command-examples-in-linux/ multiple
253 2b9493d7-49b8-493a-a71b-56cd1f4d6908 https://devicetests.com/kill-libreoffice-writer-command-line-ubuntu multiple
254 51f5801c-18b3-4f25-b0c3-02f85507a078 https://github.com/danielrcollins1/ImpressExtractNotes multiple
255 81de345e-5473-4cb6-a74d-b6abf3475a6a https://stackoverflow.com/questions/45588952/how-can-i-compose-and-send-email-in-thunderbird-from-commandline multiple
256 2c9fc0de-3ee7-45e1-a5df-c86206ad78b5 https://nikki-ricks.medium.com/how-to-use-git-add-commit-and-push-in-vs-code-and-command-line-35c0e8c47b62 multiple
257 510f64c8-9bcc-4be1-8d30-638705850618 https://www.geeksforgeeks.org/how-to-start-vs-code-from-the-terminal-command-line/ multiple
258 9ff484f7-5c09-4398-ae29-d5904e59e138 https://stackoverflow.com/questions/38606973/playing-opening-and-pausing-vlc-command-line-executed-from-python-scripts multiple
259 d9b7c649-c975-4f53-88f5-940b29c47247 https://marketplace.uipath.com/listings/extract-the-first-1000-gmail-emails-from-the-current-month-in-a-new-google-sheets-report multiple
260 be4ef0dc-0f70-4936-81d8-3cd2b04482f8 https://marketplace.uipath.com/listings/table-data-extraction-for-sales-opportunities-to-excel-workbook multiple
261 78aed49a-a710-4321-a793-b611a7c5b56b https://marketplace.uipath.com/listings/upload-email-attachments-from-gmail-to-google-drive multiple
262 897e3b53-5d4d-444b-85cb-2cdc8a97d903 https://marketplace.uipath.com/listings/convert-word-file-to-pdf-and-store-in-onedrive multiple
263 4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc https://marketplace.uipath.com/listings/extract-data-from-a-new-invoice-file-in-google-drive-and-store-it-in-google-sheets4473 multiple
264 b52b40a5-ad70-4c53-b5b0-5650a8387052 https://marketplace.uipath.com/listings/merge-pdfs-from-gmail-email-attachments-and-upload-to-gogle-drive multiple
265 46407397-a7d5-4c6b-92c6-dbe038b1457b https://marketplace.uipath.com/listings/upload-to-google-drive-images-from-pdf-attachments-received-via-gmail multiple
266 a0b9dc9c-fc07-4a88-8c5d-5e3ecad91bcb https://marketplace.uipath.com/listings/backup-important-emails-to-onedrive-or-sharepoint multiple
267 665f4af1-617d-4009-baff-84ff66071e6a https://www.howtogeek.com/663927/how-to-open-google-chrome-using-command-prompt-on-windows-10/#open-chrome-straight-to-a-specific-website multiple
268 e6313b30-3903-4ed9-8c7d-4c47bf51fc96 https://stackoverflow.com/questions/12258086/how-do-i-run-google-chrome-as-root multiple

View File

@@ -0,0 +1,234 @@
import csv
import os
import yt_dlp
from docx import Document
import requests
from bs4 import BeautifulSoup
from PIL import Image
import pytesseract
from io import BytesIO
from docx import Document
import re
import markdownify
from markdownify import markdownify as md
def valid_xml_char_ordinal(c):
codepoint = ord(c)
# conditions ordered by presumed frequency
return (
0x20 <= codepoint <= 0xD7FF or
codepoint in (0x9, 0xA, 0xD) or
0xE000 <= codepoint <= 0xFFFD or
0x10000 <= codepoint <= 0x10FFFF
)
def download_and_clean_youtube_subtitles(video_url, txt_filepath):
# set up youtube-dl options to download the subtitles
subtitles_path = txt_filepath[0:-4]
ydl_opts = {
'skip_download': True,
'writesubtitles': True,
'writeautomaticsub': True, # if no subtitles are available, try to generate them
'subtitleslangs': ['en'],
'outtmpl': f'{subtitles_path}.%(ext)s',
'quiet': True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
# download the subtitles
ydl.download([video_url])
subtitle_file = f'{subtitles_path}.en.vtt'
# read the subtitle file
subtitles = []
try:
with open(subtitle_file, 'r', encoding='utf-8') as file:
lines = file.readlines()
# define a pattern to match the time line
pattern = re.compile(r'(\d{2}:\d{2}:\d{2}.\d{3} --> \d{2}:\d{2}:\d{2}.\d{3})|(^WEBVTT)|(^Kind: captions)|(^Language: .*)')
# clean the subtitles
for line in lines:
# if this line is a time line or it is blank , skip it
if pattern.match(line) or line.strip() == '':
continue
# add this subtitle line to subtitles list, remove the trailing spaces and line change
subtitles.append(line.strip())
# remove duplicated subtitles
subtitles = list(dict.fromkeys(subtitles))
# save the subtitles as a txt file
with open(txt_filepath, 'w', encoding='utf-8') as f:
for line in subtitles:
if line:
f.write(line + '\n')
except IOError:
print(f"Could not read file: {subtitle_file}")
# scrape a webpage and perform OCR on images
def scrape_and_ocr_forum(url, doc):
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
text_elements = soup.find_all(['h1', 'h2', 'h3', 'p', 'li'])
for element in text_elements:
doc.add_paragraph(element.get_text())
image_elements = soup.find_all('img')
for image in image_elements:
if 'src' not in image.attrs:
continue
image_url = image['src']
if image_url.startswith('http'):
if not image_url.endswith('.svg') and not image_url.endswith('.png'):
continue
if 'neveragain.allstatics.com/2019/assets/icon/logo' in image_url:
continue
img_response = requests.get(image_url, stream=True)
img = Image.open(BytesIO(img_response.content))
ocr_text = pytesseract.image_to_string(img)
if ocr_text != ' ' and ocr_text != '':
cleaned_string = ''.join(c for c in ocr_text if valid_xml_char_ordinal(c))
doc.add_paragraph(cleaned_string)
def superuser_to_markdown(url, doc_filepath):
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
# set up the markdown document
markdown_content = ""
# get the question title and body
question_title = soup.find('h1').get_text(strip=True)
question = soup.find('div', {'id': 'question'})
if question:
question_body = question.find('div', {'class': 's-prose js-post-body'}).prettify()
markdown_content += f"# {question_title}\n\n" + markdownify.markdownify(question_body, heading_style="ATX") + "\n\n"
# get all answers
answers = soup.find_all('div', {'class': 'answer'})
for answer in answers:
answer_body = answer.find('div', {'class': 's-prose js-post-body'}).prettify()
markdown_content += markdownify.markdownify(answer_body, heading_style="ATX") + "\n\n"
# deal with images and perform OCR
all_img_tags = question.find_all('img') + [img for answer in answers for img in answer.find_all('img')]
for img_tag in all_img_tags:
image_src = img_tag.get('src') or img_tag.get('data-src') # Superuser uses lazy loading
if image_src and image_src.startswith('http'):
img_response = requests.get(image_src, stream=True)
img = Image.open(BytesIO(img_response.content))
ocr_text = pytesseract.image_to_string(img)
if ocr_text.strip(): # if the OCR result is not empty, add it to the markdown content
markdown_content += "```\n" + ocr_text.strip() + "\n```\n\n"
with open(doc_filepath, 'w', encoding='utf-8') as f:
f.write(markdown_content)
def stack_overflow_to_markdown(url, doc_filepath):
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
# set up the markdown document
markdown_content = ""
# get the question title and body
question = soup.find('div', {'id': 'question'})
question_title = soup.find('h1').get_text(strip=True)
if question:
question_body = question.find('div', {'class': 's-prose js-post-body'}).prettify()
markdown_content += f"# {question_title}\n\n" + markdownify.markdownify(question_body, heading_style="ATX") + "\n\n"
# get all answers
answers = soup.find_all('div', {'class': 'answer'})
for answer in answers:
answer_body = answer.find('div', {'class': 's-prose js-post-body'}).prettify()
markdown_content += markdownify.markdownify(answer_body, heading_style="ATX") + "\n\n"
# deal with images and perform OCR
all_img_tags = soup.find_all('img')
for img_tag in all_img_tags:
image_url = img_tag['src']
if image_url.startswith('http') and (image_url.endswith('.svg') or image_url.endswith('.png')): # 确保图片URL有效
img_response = requests.get(image_url, stream=True)
img = Image.open(BytesIO(img_response.content))
ocr_text = pytesseract.image_to_string(img)
if ocr_text.strip():
markdown_content += "```\n" + ocr_text.strip() + "\n```\n\n"
with open(doc_filepath, 'w', encoding='utf-8') as f:
f.write(markdown_content)
def scrape_webpage_to_markdown(url, doc_filepath):
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
articles = soup.find_all('article') or soup.find_all('main') or soup.find_all('div', {'class': 'lia-message-body-content'})
if not articles:
return
markdown_content = ''
# scrape the webpage and perform OCR on images
for article in articles:
for child in article.recursiveChildGenerator():
# if this is an image, perform OCR
if child.name == 'img':
img_url = child.get('src')
if not img_url.startswith(('http:', 'https:')):
img_url = '{}{}'.format(url, img_url)
if not img_url.endswith('.svg') and not img_url.endswith('.png'):
continue
if 'neveragain.allstatics.com/2019/assets/icon/logo' in img_url:
continue
img_response = requests.get(img_url, stream=True)
img = Image.open(BytesIO(img_response.content))
ocr_text = pytesseract.image_to_string(img)
if ocr_text.strip():
markdown_content += '\n```plaintext\n{}\n```\n'.format(ocr_text.strip())
continue
# Not an image, so continue recursively calling function
if child.name is None:
continue
html_str = str(child)
markdown_content += md(html_str) + '\n\n'
with open(doc_filepath, 'w', encoding='utf-8') as f:
f.write(markdown_content)
# process a URL and save the file
def process_url(url, doc_id, app):
doc_filepath = f"/content/drive/MyDrive/SourceDoc/{doc_id}_{app}.md"
txt_filepath = f"/content/drive/MyDrive/SourceDoc/{doc_id}_{app}.txt"
doc = Document()
if 'youtube.com' in url or 'youtu.be' in url:
download_and_clean_youtube_subtitles(url, txt_filepath)
elif 'superuser.com' in url:
superuser_to_markdown(url, doc_filepath)
elif 'stackoverflow.com' in url:
stack_overflow_to_markdown(url, doc_filepath)
else:
scrape_webpage_to_markdown(url, doc_filepath)
# read the CSV file and process each URL
csv_filepath = './Get_Source_Doc - Sheet1.csv'
with open(csv_filepath, 'r', newline='', encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile)
cnt = 55
for row in reader:
if cnt>0:
cnt -= 1
continue
process_url(row['Source'], row['id'], row['InvolvedApp'])
print(row)