From d7c4f9b0cb65fb19fa5a165e4d807a6af000e549 Mon Sep 17 00:00:00 2001 From: Showdown76py Date: Mon, 19 May 2025 13:27:16 +0200 Subject: [PATCH] fix: Update image handling in process method; ensure only the last two messages retain images and improve debugging output --- ai/processor.py | 9 +++++---- objects/aic.py | 5 +++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/ai/processor.py b/ai/processor.py index ecfcf79..7da90fc 100644 --- a/ai/processor.py +++ b/ai/processor.py @@ -75,16 +75,17 @@ class AIProcessor: processor=self, ) outputs.append(r) if r else None - # Make sure the two last messages from user has an image, but set disable_image to True for the others - for msg in self.session.messages[:-2]: - msg.image = None + # Make sure every images except the two last are removed + for msg in self.session.messages[:-3]: + if msg.image and not msg.disable_image: + msg.image = None # copy of self.session.messages, but shorten the image data for better debugging cps = [ aic.Message( role=msg.role, content=msg.content, image=msg.image[:20] if isinstance(msg.image, str) else None, # type: ignore - disable_image=True, + disable_image=msg.disable_image, name=msg.name, ) for msg in self.session.messages diff --git a/objects/aic.py b/objects/aic.py index 48ae31f..a1019bd 100644 --- a/objects/aic.py +++ b/objects/aic.py @@ -8,7 +8,8 @@ SYSTEM_PROMPT = """ You are CopeAI Windows Agent. You are currently controlling a Windows 11 machine. \ You are capable to see the screen, click buttons, type text, and interact with the system. \ You will use the functions provided. The resolution of the machine is 1920x1080. \ -Your text response must indicate what you are doing.""" +Your text response must indicate what you are doing. If the place where you clicked seems incorrect, \ +you will use everything you can to find the position of the location of the goal and click again.""" FUNCTIONS = [ { @@ -30,7 +31,7 @@ FUNCTIONS = [ "click_type": { "type": "string", "enum": ["left", "double_left", "middle", "right"], - "description": "The type of mouse click to perform." + "description": "The type of mouse click to perform. `double_left` is a double click." } }, "required": ["click_type", "x", "y"],