import pyautogui import threading import time, io, base64 import sys # Try PyQt5 for transparent, click-through overlay try: from PyQt5 import QtWidgets, QtCore, QtGui PYQT_AVAILABLE = True except ImportError: import tkinter as tk import sys if sys.platform == 'darwin': try: from AppKit import NSApplication, NSPanel, NSColor, NSBezierPath, NSView, NSWindowStyleMaskBorderless, NSBackingStoreBuffered, NSStatusWindowLevel, NSWindowCollectionBehaviorCanJoinAllSpaces COCOA_AVAILABLE = True except ImportError: COCOA_AVAILABLE = False else: COCOA_AVAILABLE = False PYQT_AVAILABLE = False from objects.inputs import MouseInput, KeyboardInput, ButtonType from PIL import ImageGrab # type: ignore def take_screenshot() -> bytes: """Take a screenshot of the current screen and return it as bytes.""" screenshot = ImageGrab.grab() buf = io.BytesIO() screenshot.save(buf, format='PNG') return buf.getvalue() def screenshot_to_base64(screenshot: bytes) -> str: """Convert screenshot bytes to a base64 encoded string.""" return base64.b64encode(screenshot).decode('utf-8') def show_click_indicator(x: int, y: int, duration: float = 2.0, size: int = 50) -> None: # """Display a red circle at (x, y) for the given duration.""" # """Try PyQt5 overlay first, else fallback to tkinter.""" # half = size // 2 # if PYQT_AVAILABLE: # # Setup QApplication # app = QtWidgets.QApplication.instance() or QtWidgets.QApplication(sys.argv) # # Frameless, transparent, click-through window # w = QtWidgets.QWidget() # flags = QtCore.Qt.FramelessWindowHint | QtCore.Qt.WindowStaysOnTopHint | QtCore.Qt.Tool # w.setWindowFlags(flags) # w.setAttribute(QtCore.Qt.WA_TranslucentBackground) # w.setAttribute(QtCore.Qt.WA_TransparentForMouseEvents) # w.setGeometry(x-half, y-half, size, size) # # Draw circle # pixmap = QtGui.QPixmap(size, size) # pixmap.fill(QtCore.Qt.transparent) # painter = QtGui.QPainter(pixmap) # pen = QtGui.QPen(QtGui.QColor('red'), 4) # painter.setPen(pen) # painter.drawEllipse(2, 2, size-4, size-4) # painter.end() # label = QtWidgets.QLabel(w) # label.setPixmap(pixmap) # w.show() # app.processEvents() # time.sleep(duration) # w.close() # else: # # Fallback tkinter overlay (may intercept clicks) # root = tk.Tk() # root.overrideredirect(True) # root.attributes('-topmost', True) # root.attributes('-alpha', 0.5) # root.geometry(f"{size}x{size}+{x-half}+{y-half}") # canvas = tk.Canvas(root, width=size, height=size, highlightthickness=0, bg='white') # canvas.pack() # canvas.create_oval(2, 2, size-2, size-2, outline='red', width=4) # root.update() # time.sleep(duration) # root.destroy() pass def press_mouse(mouse_input: MouseInput) -> None: """Presses mouse buttons at the given position.""" x, y = mouse_input.x, mouse_input.y button = mouse_input.click_type if button == "left": pyautogui.click(x, y, button='left') elif button == "double_left": pyautogui.doubleClick(x, y) elif button == "right": pyautogui.click(x, y, button='right') elif button == "middle": pyautogui.click(x, y, button='middle') # Show red circle indicator at click position for 2 seconds threading.Thread(target=show_click_indicator, args=(x, y), daemon=True).start() def press_keyboard(keyboard_input: KeyboardInput) -> None: """Types the given sequence of keys.""" text = keyboard_input.text if text: pyautogui.typewrite(text) if keyboard_input.press_enter: pyautogui.press('enter') def wait(duration: float) -> None: """Waits for the specified duration in seconds.""" time.sleep(duration) def reprompt(nextsteps: str, processor) -> None: """Re-execute GPT and take a new screenshot.""" scr = screenshot_to_base64(take_screenshot()) return processor.process(nextsteps, img_data=scr) def _execute(name, args, processor): if name == "click_button": press_mouse(MouseInput(**args)) elif name == "type_text": press_keyboard(KeyboardInput(**args)) elif name == "wait": wait(**args) elif name == "reprompt": reprompt(**args, processor=processor)