feat: wip: give OCR+positions
This commit is contained in:
parent
5be7f9aadb
commit
105ab4a04b
@ -51,17 +51,6 @@ class AIProcessor:
|
||||
aic.Message(role="user", content=prompt, image=img_data)
|
||||
)
|
||||
# if image provided, perform OCR and include text positions
|
||||
if img_data is not None:
|
||||
# decode base64 if needed
|
||||
try:
|
||||
img_bytes = base64.b64decode(img_data) if isinstance(img_data, str) else img_data
|
||||
ocr_results = ai.compute.perform_ocr(img_bytes)
|
||||
# append OCR results as a tool message
|
||||
self.session.messages.append(
|
||||
aic.Message(role="tool", name="ocr", content=json.dumps(ocr_results))
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("OCR failed: %s", e)
|
||||
response = self.oai.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=self.session.messages_dict(),
|
||||
@ -113,6 +102,17 @@ class AIProcessor:
|
||||
"Shortened message copies for processing: %s", cps
|
||||
)
|
||||
if reexec:
|
||||
img_bytes = ai.compute.take_screenshot(cross_position=click_positions)
|
||||
img = ai.compute.screenshot_to_base64(
|
||||
img_bytes
|
||||
)
|
||||
|
||||
ocr_results = []
|
||||
try:
|
||||
ocr_results = ai.compute.perform_ocr(img_bytes)
|
||||
except Exception as e:
|
||||
logger.debug("OCR failed: %s", e)
|
||||
|
||||
self.session.messages.append(
|
||||
aic.Message(
|
||||
role="assistant",
|
||||
@ -120,13 +120,7 @@ class AIProcessor:
|
||||
)
|
||||
)
|
||||
|
||||
img = ai.compute.screenshot_to_base64(
|
||||
ai.compute.take_screenshot(cross_position=click_positions)
|
||||
)
|
||||
|
||||
|
||||
|
||||
outputs.extend( self.process(nextsteps, img) )
|
||||
outputs.extend( self.process(nextsteps+f"\nOCR Positions: {ocr_results}", img) )
|
||||
return [
|
||||
{
|
||||
"name": tc.function.name,
|
||||
|
Loading…
Reference in New Issue
Block a user