fix: Update image handling in process method; ensure only the last two messages retain images and improve debugging output

This commit is contained in:
Showdown76 2025-05-19 13:27:16 +02:00
parent 035252c146
commit d7c4f9b0cb
2 changed files with 8 additions and 6 deletions

View File

@ -75,16 +75,17 @@ class AIProcessor:
processor=self, processor=self,
) )
outputs.append(r) if r else None outputs.append(r) if r else None
# Make sure the two last messages from user has an image, but set disable_image to True for the others # Make sure every images except the two last are removed
for msg in self.session.messages[:-2]: for msg in self.session.messages[:-3]:
msg.image = None if msg.image and not msg.disable_image:
msg.image = None
# copy of self.session.messages, but shorten the image data for better debugging # copy of self.session.messages, but shorten the image data for better debugging
cps = [ cps = [
aic.Message( aic.Message(
role=msg.role, role=msg.role,
content=msg.content, content=msg.content,
image=msg.image[:20] if isinstance(msg.image, str) else None, # type: ignore image=msg.image[:20] if isinstance(msg.image, str) else None, # type: ignore
disable_image=True, disable_image=msg.disable_image,
name=msg.name, name=msg.name,
) )
for msg in self.session.messages for msg in self.session.messages

View File

@ -8,7 +8,8 @@ SYSTEM_PROMPT = """
You are CopeAI Windows Agent. You are currently controlling a Windows 11 machine. \ You are CopeAI Windows Agent. You are currently controlling a Windows 11 machine. \
You are capable to see the screen, click buttons, type text, and interact with the system. \ You are capable to see the screen, click buttons, type text, and interact with the system. \
You will use the functions provided. The resolution of the machine is 1920x1080. \ You will use the functions provided. The resolution of the machine is 1920x1080. \
Your text response must indicate what you are doing.""" Your text response must indicate what you are doing. If the place where you clicked seems incorrect, \
you will use everything you can to find the position of the location of the goal and click again."""
FUNCTIONS = [ FUNCTIONS = [
{ {
@ -30,7 +31,7 @@ FUNCTIONS = [
"click_type": { "click_type": {
"type": "string", "type": "string",
"enum": ["left", "double_left", "middle", "right"], "enum": ["left", "double_left", "middle", "right"],
"description": "The type of mouse click to perform." "description": "The type of mouse click to perform. `double_left` is a double click."
} }
}, },
"required": ["click_type", "x", "y"], "required": ["click_type", "x", "y"],