gpt-agent/ai/compute.py

65 lines
2.2 KiB
Python

import pyautogui
import threading
import time, io, base64
import sys
from objects.inputs import MouseInput, KeyboardInput, ButtonType
from PIL import ImageGrab # type: ignore
def take_screenshot() -> bytes:
"""Take a screenshot of the current screen and return it as bytes."""
screenshot = ImageGrab.grab()
buf = io.BytesIO()
screenshot.save(buf, format='PNG')
return buf.getvalue()
def screenshot_to_base64(screenshot: bytes) -> str:
"""Convert screenshot bytes to a base64 encoded string."""
return base64.b64encode(screenshot).decode('utf-8')
def show_click_indicator(x: int, y: int, duration: float = 2.0, size: int = 50) -> None:
"""Display a red circle at (x, y) for the given duration, can be clicked through."""
pass
def press_mouse(mouse_input: MouseInput) -> None:
"""Presses mouse buttons at the given position."""
x, y = mouse_input.x, mouse_input.y
button = mouse_input.click_type
if button == "left":
pyautogui.click(x, y, button='left')
elif button == "double_left":
pyautogui.doubleClick(x, y)
elif button == "right":
pyautogui.click(x, y, button='right')
elif button == "middle":
pyautogui.click(x, y, button='middle')
# Show red circle indicator at click position for 2 seconds
threading.Thread(target=show_click_indicator, args=(x, y), daemon=True).start()
def press_keyboard(keyboard_input: KeyboardInput) -> None:
"""Types the given sequence of keys."""
text = keyboard_input.text
if text:
pyautogui.typewrite(text)
if keyboard_input.press_enter:
pyautogui.press('enter')
def wait(duration: float) -> None:
"""Waits for the specified duration in seconds."""
time.sleep(duration)
def reprompt(nextsteps: str, processor) -> None:
"""Re-execute GPT and take a new screenshot."""
scr = screenshot_to_base64(take_screenshot())
return processor.process(nextsteps, img_data=scr)
def _execute(name, args, processor):
if name == "click_button":
press_mouse(MouseInput(**args))
elif name == "type_text":
press_keyboard(KeyboardInput(**args))
elif name == "wait":
wait(**args)
elif name == "reprompt":
reprompt(**args, processor=processor)