feat: wip: give OCR+positions
This commit is contained in:
		@@ -51,17 +51,6 @@ class AIProcessor:
 | 
				
			|||||||
                aic.Message(role="user", content=prompt, image=img_data)
 | 
					                aic.Message(role="user", content=prompt, image=img_data)
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            # if image provided, perform OCR and include text positions
 | 
					            # if image provided, perform OCR and include text positions
 | 
				
			||||||
            if img_data is not None:
 | 
					 | 
				
			||||||
                # decode base64 if needed
 | 
					 | 
				
			||||||
                try:
 | 
					 | 
				
			||||||
                    img_bytes = base64.b64decode(img_data) if isinstance(img_data, str) else img_data
 | 
					 | 
				
			||||||
                    ocr_results = ai.compute.perform_ocr(img_bytes)
 | 
					 | 
				
			||||||
                    # append OCR results as a tool message
 | 
					 | 
				
			||||||
                    self.session.messages.append(
 | 
					 | 
				
			||||||
                        aic.Message(role="tool", name="ocr", content=json.dumps(ocr_results))
 | 
					 | 
				
			||||||
                    )
 | 
					 | 
				
			||||||
                except Exception as e:
 | 
					 | 
				
			||||||
                    logger.debug("OCR failed: %s", e)
 | 
					 | 
				
			||||||
            response = self.oai.chat.completions.create(
 | 
					            response = self.oai.chat.completions.create(
 | 
				
			||||||
                model=self.model,
 | 
					                model=self.model,
 | 
				
			||||||
                messages=self.session.messages_dict(),
 | 
					                messages=self.session.messages_dict(),
 | 
				
			||||||
@@ -113,6 +102,17 @@ class AIProcessor:
 | 
				
			|||||||
                    "Shortened message copies for processing: %s", cps
 | 
					                    "Shortened message copies for processing: %s", cps
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
                if reexec:
 | 
					                if reexec:
 | 
				
			||||||
 | 
					                    img_bytes = ai.compute.take_screenshot(cross_position=click_positions)
 | 
				
			||||||
 | 
					                    img = ai.compute.screenshot_to_base64(
 | 
				
			||||||
 | 
					                        img_bytes
 | 
				
			||||||
 | 
					                    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    ocr_results = []
 | 
				
			||||||
 | 
					                    try:
 | 
				
			||||||
 | 
					                        ocr_results = ai.compute.perform_ocr(img_bytes)
 | 
				
			||||||
 | 
					                    except Exception as e:
 | 
				
			||||||
 | 
					                        logger.debug("OCR failed: %s", e)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    self.session.messages.append(
 | 
					                    self.session.messages.append(
 | 
				
			||||||
                        aic.Message(
 | 
					                        aic.Message(
 | 
				
			||||||
                            role="assistant",
 | 
					                            role="assistant",
 | 
				
			||||||
@@ -120,13 +120,7 @@ class AIProcessor:
 | 
				
			|||||||
                        )
 | 
					                        )
 | 
				
			||||||
                    )
 | 
					                    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    img = ai.compute.screenshot_to_base64(
 | 
					                    outputs.extend( self.process(nextsteps+f"\nOCR Positions: {ocr_results}", img) )
 | 
				
			||||||
                        ai.compute.take_screenshot(cross_position=click_positions)
 | 
					 | 
				
			||||||
                    )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    outputs.extend( self.process(nextsteps, img) )
 | 
					 | 
				
			||||||
                return [
 | 
					                return [
 | 
				
			||||||
                    {
 | 
					                    {
 | 
				
			||||||
                        "name": tc.function.name,
 | 
					                        "name": tc.function.name,
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user