Add initial implementation of AI agent with mouse and keyboard control features
This commit is contained in:
29
ai/compute.py
Normal file
29
ai/compute.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import pyautogui
|
||||
from objects.inputs import MouseInput, KeyboardInput, ButtonType
|
||||
|
||||
def press_mouse(mouse_input: MouseInput) -> None:
|
||||
"""Presses mouse buttons at the given position."""
|
||||
x, y = mouse_input.x, mouse_input.y
|
||||
button = mouse_input.click_type
|
||||
if button == ButtonType.LEFT:
|
||||
pyautogui.click(x, y, button='left')
|
||||
elif button == ButtonType.DOUBLE_LEFT:
|
||||
pyautogui.doubleClick(x, y)
|
||||
elif button == ButtonType.RIGHT:
|
||||
pyautogui.click(x, y, button='right')
|
||||
elif button == ButtonType.MIDDLE:
|
||||
pyautogui.click(x, y, button='middle')
|
||||
|
||||
def press_keyboard(keyboard_input: KeyboardInput) -> None:
|
||||
"""Types the given sequence of keys."""
|
||||
text = keyboard_input.text
|
||||
if text:
|
||||
pyautogui.typewrite(text)
|
||||
if keyboard_input.press_enter:
|
||||
pyautogui.press('enter')
|
||||
|
||||
def _execute(name, args):
|
||||
if name == "click_button":
|
||||
press_mouse(MouseInput(**args))
|
||||
elif name == "type_text":
|
||||
press_keyboard(KeyboardInput(**args))
|
73
ai/processor.py
Normal file
73
ai/processor.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import traceback
|
||||
import json # new
|
||||
import openai
|
||||
from objects import aic
|
||||
import ai.compute
|
||||
|
||||
class AIProcessor:
|
||||
def __init__(self, api_key: str, model: str = "gpt-4.1"):
|
||||
self.oai = openai.Client(api_key=api_key)
|
||||
self.model = model
|
||||
self.session = aic.Session(messages=[aic.Message(role="system", content=aic.SYSTEM_PROMPT)], model=model) # type: ignore
|
||||
self._tools_map = { # local binding of python callables
|
||||
"click_button": self._click_button,
|
||||
"type_text": self._type_text,
|
||||
}
|
||||
|
||||
# --------------------- tool implementations --------------------- #
|
||||
def _click_button(self, x: int, y: int, click_type: str) -> str:
|
||||
# TODO: integrate real mouse automation.
|
||||
return f"Performed {click_type} click at ({x}, {y})."
|
||||
|
||||
def _type_text(self, text: str) -> str:
|
||||
# TODO: integrate real typing automation.
|
||||
return f'Typed text: "{text}"'
|
||||
|
||||
def _execute_tool(self, name: str, arguments: dict) -> str:
|
||||
func = self._tools_map.get(name)
|
||||
if not func:
|
||||
return f"Unknown tool: {name}"
|
||||
try:
|
||||
return func(**arguments)
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
return f"Error executing {name}: {e}"
|
||||
|
||||
# -------------------------- main entry -------------------------- #
|
||||
def process(self, prompt: str, img_data: str | bytes | None = None) -> str | list[dict]:
|
||||
try:
|
||||
self.session.messages.append(
|
||||
aic.Message(role="user", content=prompt, image=img_data)
|
||||
)
|
||||
response = self.oai.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=self.session.messages_dict(),
|
||||
tools=aic.FUNCTIONS, # type: ignore
|
||||
)
|
||||
|
||||
# return tool call requests if any
|
||||
tool_calls = getattr(response.choices[0].message, "tool_calls", None)
|
||||
if tool_calls:
|
||||
for tc in tool_calls:
|
||||
ai.compute._execute(
|
||||
name=tc.function.name,
|
||||
args=json.loads(tc.function.arguments)
|
||||
)
|
||||
return [
|
||||
{
|
||||
"name": tc.function.name,
|
||||
"arguments": json.loads(tc.function.arguments),
|
||||
}
|
||||
for tc in tool_calls
|
||||
]
|
||||
|
||||
# otherwise return final assistant content
|
||||
print(f"Response: {json.dumps(response.to_dict(), indent=4)}") # debug
|
||||
output_text: str = response.choices[0].message.content # type: ignore
|
||||
self.session.messages.append(
|
||||
aic.Message(role="assistant", content=output_text)
|
||||
)
|
||||
return output_text
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
return f"Error processing request: {str(e)}"
|
Reference in New Issue
Block a user