Compare commits
9 Commits
93a01b792b
...
main
Author | SHA1 | Date | |
---|---|---|---|
6b13586154 | |||
7192f4bc18 | |||
36cfeffe9c | |||
7f5296b2ef | |||
e5b3ea8b57 | |||
ff7c362cfe | |||
b035bee682 | |||
c2fb041285 | |||
4369611610 |
@@ -5,6 +5,9 @@ import time, io, base64
|
||||
import sys
|
||||
from objects.inputs import MouseInput, KeyboardInput, ButtonType
|
||||
from PIL import ImageGrab, ImageDraw # type: ignore
|
||||
from objects import logger as logger_module
|
||||
import logging
|
||||
logger: logging.Logger = logger_module.get_logger(__name__)
|
||||
|
||||
def take_screenshot(cross_position: list[tuple[int, int]] | None = None) -> bytes:
|
||||
"""Take a screenshot of the current screen and return it as bytes."""
|
||||
@@ -42,11 +45,31 @@ def perform_ocr(screenshot: bytes) -> list[dict]:
|
||||
for i in range(n):
|
||||
text = data['text'][i]
|
||||
if text and text.strip():
|
||||
# Fix the center-point calculation (add first, then divide)
|
||||
results.append({
|
||||
'text': text,
|
||||
'x': data['width'][i] + data['left'][i] // 2, # center x position
|
||||
'y': data['height'][i] + data['top'][i] // 2 # center y position
|
||||
'x': data['left'][i] + data['width'][i] // 2,
|
||||
'y': data['top'][i] + data['height'][i] // 2
|
||||
})
|
||||
|
||||
# check if debug is enabled
|
||||
if logger.isEnabledFor(logging.DEBUG):
|
||||
# take screenshot + put blue circle with x, y on screenshot for each component
|
||||
screenshot_with_circles = Image.open(io.BytesIO(screenshot))
|
||||
draw = ImageDraw.Draw(screenshot_with_circles)
|
||||
for result in results:
|
||||
x, y = result['x'], result['y']
|
||||
size = 10
|
||||
color = (0, 0, 255) # blue
|
||||
width = 2
|
||||
# horizontal line
|
||||
draw.line((x - size, y, x + size, y), fill=color, width=width)
|
||||
# vertical line
|
||||
draw.line((x, y - size, x, y + size), fill=color, width=width)
|
||||
screenshot_with_circles.save("screenshot_with_circles.png", format='PNG')
|
||||
# save in a file
|
||||
logger.debug("Debug, saving ocr results screenshot with circles")
|
||||
screenshot_with_circles.save("ocr_results.png", format='PNG')
|
||||
return results
|
||||
|
||||
def screenshot_to_base64(screenshot: bytes) -> str:
|
||||
@@ -87,7 +110,7 @@ def wait(duration: float) -> None:
|
||||
def search_pc(query: str) -> None:
|
||||
"""Presses the Windows key."""
|
||||
pyautogui.hotkey('win')
|
||||
wait(2)
|
||||
wait(4)
|
||||
press_keyboard(KeyboardInput(text=query))
|
||||
|
||||
def reprompt(nextsteps: str, processor) -> None:
|
||||
|
@@ -91,7 +91,7 @@ class AIProcessor:
|
||||
cps = [
|
||||
aic.Message(
|
||||
role=msg.role,
|
||||
content=msg.content,
|
||||
content=msg.content[:80],
|
||||
image=msg.image[:20] if isinstance(msg.image, str) else None, # type: ignore
|
||||
disable_image=msg.disable_image,
|
||||
name=msg.name,
|
||||
@@ -111,12 +111,13 @@ class AIProcessor:
|
||||
try:
|
||||
ocr_results = ai.compute.perform_ocr(img_bytes)
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
logger.debug("OCR failed: %s", e)
|
||||
|
||||
self.session.messages.append(
|
||||
aic.Message(
|
||||
role="assistant",
|
||||
content=str(tool_calls),
|
||||
content=str(((tc.function.name, tc.function.arguments) for tc in tool_calls)),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -134,7 +135,7 @@ class AIProcessor:
|
||||
output_text: str = response.choices[0].message.content # type: ignore
|
||||
outputs.append(output_text)
|
||||
self.session.messages.append(
|
||||
aic.Message(role="assistant", content=output_text)
|
||||
aic.Message(role="assistant", content="Executed: " + (str(*outputs)))
|
||||
)
|
||||
|
||||
return [*outputs]
|
||||
|
@@ -31,7 +31,7 @@ FUNCTIONS = [
|
||||
"click_type": {
|
||||
"type": "string",
|
||||
"enum": ["left", "double_left", "middle", "right"],
|
||||
"description": "The type of mouse click to perform. `double_left` is a double click."
|
||||
"description": "The type of mouse click to perform. `double_left` is a double click, used to open apps or files."
|
||||
}
|
||||
},
|
||||
"required": ["click_type", "x", "y"],
|
||||
|
Reference in New Issue
Block a user