Add initial implementation of AI agent with mouse and keyboard control features
This commit is contained in:
		
							
								
								
									
										97
									
								
								objects/aic.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								objects/aic.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,97 @@
 | 
			
		||||
from dataclasses import dataclass
 | 
			
		||||
from typing import Optional, Union
 | 
			
		||||
from typing_extensions import Literal
 | 
			
		||||
 | 
			
		||||
compatible_models = ("gpt-4.1", "gpt-4.1-mini", 'o4-mini', 'gpt-4.1-nano')
 | 
			
		||||
 | 
			
		||||
SYSTEM_PROMPT = """
 | 
			
		||||
You are CopeAI Windows Agent. You are currently controlling a Windows 11 machine. \
 | 
			
		||||
You are capable to see the screen, click buttons, type text, and interact with the system. \
 | 
			
		||||
You will use the functions provided. The resolution of the machine is 1920x1080. \
 | 
			
		||||
Your text response must indicate what you are doing."""
 | 
			
		||||
 | 
			
		||||
FUNCTIONS = [
 | 
			
		||||
    {
 | 
			
		||||
        "type": "function",
 | 
			
		||||
        "function": {
 | 
			
		||||
            "name": "click_button",
 | 
			
		||||
            "description": "Click a button at the specified (x, y) position with the given click type.",
 | 
			
		||||
            "parameters": {
 | 
			
		||||
                "type": "object",
 | 
			
		||||
                "properties": {
 | 
			
		||||
                    "x": {
 | 
			
		||||
                        "type": "integer",
 | 
			
		||||
                        "description": "The X coordinate of the button."
 | 
			
		||||
                    },
 | 
			
		||||
                    "y": {
 | 
			
		||||
                        "type": "integer",
 | 
			
		||||
                        "description": "The Y coordinate of the button."
 | 
			
		||||
                    },
 | 
			
		||||
                    "click_type": {
 | 
			
		||||
                        "type": "string",
 | 
			
		||||
                        "enum": ["left", "double_left", "middle", "right"],
 | 
			
		||||
                        "description": "The type of mouse click to perform."
 | 
			
		||||
                    }
 | 
			
		||||
                },
 | 
			
		||||
                "required": ["click_type", "x", "y"],
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
        "type": "function",
 | 
			
		||||
        "function": {
 | 
			
		||||
            "name": "type_text",
 | 
			
		||||
            "description": "Type the given text at the current cursor location.",
 | 
			
		||||
            "parameters": {
 | 
			
		||||
                "type": "object",
 | 
			
		||||
                "properties": {
 | 
			
		||||
                    "text": {
 | 
			
		||||
                        "type": "string",
 | 
			
		||||
                        "description": "The text to type."
 | 
			
		||||
                    },
 | 
			
		||||
                    "press_enter": {
 | 
			
		||||
                        "type": "boolean",
 | 
			
		||||
                        "default": False,
 | 
			
		||||
                        "description": "Whether to press Enter after typing the text."
 | 
			
		||||
                    }
 | 
			
		||||
                },
 | 
			
		||||
                "required": ["text", "press_enter"],
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@dataclass
 | 
			
		||||
class Message:
 | 
			
		||||
    role: Literal['user', 'assistant', 'system', 'tool']       # + tool
 | 
			
		||||
    content: str
 | 
			
		||||
    image: Optional[Union[str, bytes]] = None
 | 
			
		||||
    disable_image: bool = False
 | 
			
		||||
    name: Optional[str] = None                                 # new – only for tool messages
 | 
			
		||||
 | 
			
		||||
    def to_dict(self) -> dict:
 | 
			
		||||
        base = {
 | 
			
		||||
            "role": self.role,
 | 
			
		||||
            "content": self.content
 | 
			
		||||
                      if (not self.image and not self.disable_image)
 | 
			
		||||
                      else [{"type": "text", "text": self.content},
 | 
			
		||||
                            {"type": "image_url", "image_url": {"url":f"data:image/png;base64,{self.image}"}}],
 | 
			
		||||
        }
 | 
			
		||||
        if self.role == "tool" and self.name:                 # include tool name if present
 | 
			
		||||
            base["name"] = self.name
 | 
			
		||||
        return base
 | 
			
		||||
 | 
			
		||||
@dataclass
 | 
			
		||||
class Session:
 | 
			
		||||
    messages: list[Message]
 | 
			
		||||
    model: str = "gpt-4.1"
 | 
			
		||||
 | 
			
		||||
    def to_dict(self) -> dict:
 | 
			
		||||
        return {
 | 
			
		||||
            "messages": [message.to_dict() for message in self.messages],
 | 
			
		||||
            "model": self.model
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    def messages_dict(self) -> list:
 | 
			
		||||
        return [message.to_dict() for message in self.messages]
 | 
			
		||||
							
								
								
									
										20
									
								
								objects/inputs.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								objects/inputs.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,20 @@
 | 
			
		||||
from dataclasses import dataclass
 | 
			
		||||
from enum import Enum
 | 
			
		||||
 | 
			
		||||
class ButtonType(Enum):
 | 
			
		||||
    LEFT = "left"
 | 
			
		||||
    DOUBLE_LEFT = "double_left"
 | 
			
		||||
    RIGHT = "right"
 | 
			
		||||
    MIDDLE = "middle"
 | 
			
		||||
 | 
			
		||||
@dataclass
 | 
			
		||||
class KeyboardInput:
 | 
			
		||||
    text: str
 | 
			
		||||
    press_enter: bool = False
 | 
			
		||||
 | 
			
		||||
@dataclass
 | 
			
		||||
class MouseInput:
 | 
			
		||||
    x: int
 | 
			
		||||
    y: int
 | 
			
		||||
    click_type: list[ButtonType]
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user