diff --git a/ai/compute.py b/ai/compute.py index 554a7ba..ad534b8 100644 --- a/ai/compute.py +++ b/ai/compute.py @@ -5,6 +5,9 @@ import time, io, base64 import sys from objects.inputs import MouseInput, KeyboardInput, ButtonType from PIL import ImageGrab, ImageDraw # type: ignore +from objects import logger as logger_module +import logging +logger: logging.Logger = logger_module.get_logger(__name__) def take_screenshot(cross_position: list[tuple[int, int]] | None = None) -> bytes: """Take a screenshot of the current screen and return it as bytes.""" @@ -47,6 +50,25 @@ def perform_ocr(screenshot: bytes) -> list[dict]: 'x': data['width'][i] + data['left'][i] // 2, # center x position 'y': data['height'][i] + data['top'][i] // 2 # center y position }) + + # check if debug is enabled + if logging.getLogger().isEnabledFor(logging.DEBUG): + # take screenshot + put blue circle with x, y on screenshot for each component + screenshot_with_circles = Image.open(io.BytesIO(screenshot)) + draw = ImageDraw.Draw(screenshot_with_circles) + for result in results: + x, y = result['x'], result['y'] + size = 10 + color = (0, 0, 255) # blue + width = 2 + # horizontal line + draw.line((x - size, y, x + size, y), fill=color, width=width) + # vertical line + draw.line((x, y - size, x, y + size), fill=color, width=width) + screenshot_with_circles.save("screenshot_with_circles.png", format='PNG') + # save in a file + logger.debug("Debug, saving ocr results screenshot with circles") + screenshot_with_circles.save("ocr_results.png", format='PNG') return results def screenshot_to_base64(screenshot: bytes) -> str: