120 lines
4.4 KiB
Python
120 lines
4.4 KiB
Python
import pyautogui
|
|
import threading
|
|
import time, io, base64
|
|
import sys
|
|
# Try PyQt5 for transparent, click-through overlay
|
|
try:
|
|
from PyQt5 import QtWidgets, QtCore, QtGui
|
|
PYQT_AVAILABLE = True
|
|
except ImportError:
|
|
import tkinter as tk
|
|
import sys
|
|
if sys.platform == 'darwin':
|
|
try:
|
|
from AppKit import NSApplication, NSPanel, NSColor, NSBezierPath, NSView, NSWindowStyleMaskBorderless, NSBackingStoreBuffered, NSStatusWindowLevel, NSWindowCollectionBehaviorCanJoinAllSpaces
|
|
COCOA_AVAILABLE = True
|
|
except ImportError:
|
|
COCOA_AVAILABLE = False
|
|
else:
|
|
COCOA_AVAILABLE = False
|
|
PYQT_AVAILABLE = False
|
|
from objects.inputs import MouseInput, KeyboardInput, ButtonType
|
|
from PIL import ImageGrab # type: ignore
|
|
|
|
def take_screenshot() -> bytes:
|
|
"""Take a screenshot of the current screen and return it as bytes."""
|
|
screenshot = ImageGrab.grab()
|
|
buf = io.BytesIO()
|
|
screenshot.save(buf, format='PNG')
|
|
return buf.getvalue()
|
|
|
|
def screenshot_to_base64(screenshot: bytes) -> str:
|
|
"""Convert screenshot bytes to a base64 encoded string."""
|
|
return base64.b64encode(screenshot).decode('utf-8')
|
|
|
|
def show_click_indicator(x: int, y: int, duration: float = 2.0, size: int = 50) -> None:
|
|
# """Display a red circle at (x, y) for the given duration."""
|
|
# """Try PyQt5 overlay first, else fallback to tkinter."""
|
|
# half = size // 2
|
|
# if PYQT_AVAILABLE:
|
|
# # Setup QApplication
|
|
# app = QtWidgets.QApplication.instance() or QtWidgets.QApplication(sys.argv)
|
|
# # Frameless, transparent, click-through window
|
|
# w = QtWidgets.QWidget()
|
|
# flags = QtCore.Qt.FramelessWindowHint | QtCore.Qt.WindowStaysOnTopHint | QtCore.Qt.Tool
|
|
# w.setWindowFlags(flags)
|
|
# w.setAttribute(QtCore.Qt.WA_TranslucentBackground)
|
|
# w.setAttribute(QtCore.Qt.WA_TransparentForMouseEvents)
|
|
# w.setGeometry(x-half, y-half, size, size)
|
|
# # Draw circle
|
|
# pixmap = QtGui.QPixmap(size, size)
|
|
# pixmap.fill(QtCore.Qt.transparent)
|
|
# painter = QtGui.QPainter(pixmap)
|
|
# pen = QtGui.QPen(QtGui.QColor('red'), 4)
|
|
# painter.setPen(pen)
|
|
# painter.drawEllipse(2, 2, size-4, size-4)
|
|
# painter.end()
|
|
# label = QtWidgets.QLabel(w)
|
|
# label.setPixmap(pixmap)
|
|
# w.show()
|
|
# app.processEvents()
|
|
# time.sleep(duration)
|
|
# w.close()
|
|
# else:
|
|
# # Fallback tkinter overlay (may intercept clicks)
|
|
# root = tk.Tk()
|
|
# root.overrideredirect(True)
|
|
# root.attributes('-topmost', True)
|
|
# root.attributes('-alpha', 0.5)
|
|
# root.geometry(f"{size}x{size}+{x-half}+{y-half}")
|
|
# canvas = tk.Canvas(root, width=size, height=size, highlightthickness=0, bg='white')
|
|
# canvas.pack()
|
|
# canvas.create_oval(2, 2, size-2, size-2, outline='red', width=4)
|
|
# root.update()
|
|
# time.sleep(duration)
|
|
# root.destroy()
|
|
pass
|
|
|
|
def press_mouse(mouse_input: MouseInput) -> None:
|
|
"""Presses mouse buttons at the given position."""
|
|
x, y = mouse_input.x, mouse_input.y
|
|
button = mouse_input.click_type
|
|
if button == "left":
|
|
pyautogui.click(x, y, button='left')
|
|
elif button == "double_left":
|
|
pyautogui.doubleClick(x, y)
|
|
elif button == "right":
|
|
pyautogui.click(x, y, button='right')
|
|
elif button == "middle":
|
|
pyautogui.click(x, y, button='middle')
|
|
# Show red circle indicator at click position for 2 seconds
|
|
threading.Thread(target=show_click_indicator, args=(x, y), daemon=True).start()
|
|
|
|
def press_keyboard(keyboard_input: KeyboardInput) -> None:
|
|
"""Types the given sequence of keys."""
|
|
text = keyboard_input.text
|
|
if text:
|
|
pyautogui.typewrite(text)
|
|
if keyboard_input.press_enter:
|
|
pyautogui.press('enter')
|
|
|
|
def wait(duration: float) -> None:
|
|
"""Waits for the specified duration in seconds."""
|
|
time.sleep(duration)
|
|
|
|
def reprompt(nextsteps: str, processor) -> None:
|
|
"""Re-execute GPT and take a new screenshot."""
|
|
scr = screenshot_to_base64(take_screenshot())
|
|
return processor.process(nextsteps, img_data=scr)
|
|
|
|
|
|
def _execute(name, args, processor):
|
|
if name == "click_button":
|
|
press_mouse(MouseInput(**args))
|
|
elif name == "type_text":
|
|
press_keyboard(KeyboardInput(**args))
|
|
elif name == "wait":
|
|
wait(**args)
|
|
elif name == "reprompt":
|
|
reprompt(**args, processor=processor)
|