From 105ab4a04b0fa6585e3c8da62dd62446011f3516 Mon Sep 17 00:00:00 2001 From: Showdown76py Date: Mon, 19 May 2025 16:10:02 +0200 Subject: [PATCH] feat: wip: give OCR+positions --- ai/processor.py | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/ai/processor.py b/ai/processor.py index 50c4888..999afb1 100644 --- a/ai/processor.py +++ b/ai/processor.py @@ -51,17 +51,6 @@ class AIProcessor: aic.Message(role="user", content=prompt, image=img_data) ) # if image provided, perform OCR and include text positions - if img_data is not None: - # decode base64 if needed - try: - img_bytes = base64.b64decode(img_data) if isinstance(img_data, str) else img_data - ocr_results = ai.compute.perform_ocr(img_bytes) - # append OCR results as a tool message - self.session.messages.append( - aic.Message(role="tool", name="ocr", content=json.dumps(ocr_results)) - ) - except Exception as e: - logger.debug("OCR failed: %s", e) response = self.oai.chat.completions.create( model=self.model, messages=self.session.messages_dict(), @@ -113,6 +102,17 @@ class AIProcessor: "Shortened message copies for processing: %s", cps ) if reexec: + img_bytes = ai.compute.take_screenshot(cross_position=click_positions) + img = ai.compute.screenshot_to_base64( + img_bytes + ) + + ocr_results = [] + try: + ocr_results = ai.compute.perform_ocr(img_bytes) + except Exception as e: + logger.debug("OCR failed: %s", e) + self.session.messages.append( aic.Message( role="assistant", @@ -120,13 +120,7 @@ class AIProcessor: ) ) - img = ai.compute.screenshot_to_base64( - ai.compute.take_screenshot(cross_position=click_positions) - ) - - - - outputs.extend( self.process(nextsteps, img) ) + outputs.extend( self.process(nextsteps+f"\nOCR Positions: {ocr_results}", img) ) return [ { "name": tc.function.name,