From 507bdf0ad267e493603e6c5d9d47abd2cbcc6136 Mon Sep 17 00:00:00 2001
From: showdown76py <showdown76.a@gmail.com>
Date: Sun, 7 Jan 2024 20:19:11 +0100
Subject: [PATCH] feat(streamed-generation): Implemented stream generation &
 example

Streamed generation, changed ConversationResponse (.response to .text)
---
 .vscode/settings.json           |  5 ++++-
 copeai_backend/conversation.py  |  4 ++--
 copeai_backend/generate.py      | 10 ++++------
 examples/basic-generation.py    |  2 +-
 examples/streamed-generation.py | 34 ++++++++++++++++++++++++---------
 5 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index eea234c..63e79b7 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,4 +1,7 @@
 {
     "python.analysis.autoImportCompletions": true,
-    "python.analysis.typeCheckingMode": "basic"
+    "python.analysis.typeCheckingMode": "off",
+    "conventionalCommits.scopes": [
+        "streamed-generation"
+    ]
 }
diff --git a/copeai_backend/conversation.py b/copeai_backend/conversation.py
index 70b3018..e8c38f7 100644
--- a/copeai_backend/conversation.py
+++ b/copeai_backend/conversation.py
@@ -67,7 +67,7 @@ class Conversation:
         self.store = storage
 
         if add_base_prompt and BASE_PROMPT:
-            self.messages.append({"role": Role.SYSTEM, "content": BASE_PROMPT})
+            self.messages.append({"role": Role.SYSTEM.value, "content": BASE_PROMPT})
 
     def add_message(self, role: Role, message, username: str | None = None):
         if not self.locked:
@@ -97,5 +97,5 @@ class ConversationResponse:
     """A response from the generation. You receive this when the **generation is done**, or non-streamed requests."""
 
     conversation: Conversation
-    response: str | list[str]
+    text: str | list[str]
     raw_response: list[ChatCompletion] | list[ChatCompletionChunk]
diff --git a/copeai_backend/generate.py b/copeai_backend/generate.py
index 79e8bd5..671cc80 100644
--- a/copeai_backend/generate.py
+++ b/copeai_backend/generate.py
@@ -1,5 +1,6 @@
 import json
 import traceback
+from typing import Any, AsyncGenerator, Coroutine, Generator
 import requests
 import openai
 
@@ -40,7 +41,7 @@ async def process_text_streaming(
     model: Model,
     new_message: str,
     additional_args: dict = {},
-):
+) -> [ConversationResponse, GeneratingResponseChunk]: # FIXME change type
     if conversation.locked:
         raise ConversationLockedException()
 
@@ -79,15 +80,12 @@ async def process_text_streaming(
                         yield GeneratingResponseChunk(partition.content, chunk)
 
                     if conversation.interruput:
-                        conversation.add_message(Role.ASSISTANT, text_parts)
                         yield ConversationResponse(conversation, text_parts, resp_parts)
 
         conversation.locked = False
-        conversation.add_message(Role.ASSISTANT, text_parts)
+
+        conversation.add_message(Role.ASSISTANT, ''.join(text_parts))
         yield ConversationResponse(conversation, text_parts, resp_parts)
-
-        conversation.locked = False
-
     except Exception as e:
         conversation.locked = False
         raise e
diff --git a/examples/basic-generation.py b/examples/basic-generation.py
index 18936ec..67ce806 100644
--- a/examples/basic-generation.py
+++ b/examples/basic-generation.py
@@ -23,7 +23,7 @@ async def main():
     )
 
     # Print the response.
-    print(response.response)
+    print(response.text)
 
     # The assistant's message is automatically implemented into the conversation object.
     # Add a new user message.
diff --git a/examples/streamed-generation.py b/examples/streamed-generation.py
index 18936ec..c1a9ac7 100644
--- a/examples/streamed-generation.py
+++ b/examples/streamed-generation.py
@@ -1,5 +1,6 @@
 # fmt: off
 
+from typing import AsyncGenerator
 from copeai_backend import generate, models, conversation
 import asyncio
 
@@ -14,16 +15,24 @@ async def main():
         storage={}                          # If you need to store some data.
     )
 
-    # Generate a response. This is a non-streamed request, so it will return a ConversationResponse object. This is a blocking call.
-    response: generate.ConversationResponse = await generate.simple_process_text(
-        conversation=conv,          # The conversation object.
-        model=models.GPT_3,     # The model to use. Add your own models to the MODELS dict in models.py.
+    # Generate a response. This is a streamed request, so it will return a GeneratingResponseChunk object.
+    # Then, at the end of the generation, ConversationResponse will be returned.
+    # This is a non-blocking call.
+    response = generate.process_text_streaming(
+        conversation=conv,                  # The conversation object.
+        model=models.GPT_3,                 # The model to use. Add your own models to the MODELS dict in models.py.
         new_message="Hello, how are you?",  # The message to send.
-        # additional_args={}                  # Additional arguments to send to the API. These are different for each API.
-    )
+        # additional_args={}                # Additional arguments to send to the API. These are different for each API.
+    ) # type: ignore
 
-    # Print the response.
-    print(response.response)
+    async for chunk in response:
+        if isinstance(chunk, conversation.GeneratingResponseChunk):
+            print(chunk.text, end="")
+        else:
+            print('\nConversation ended!')
+        
+    # To retrieve the response, you can use the ConversationResponse object, from the last iteration.
+    _response = chunk.text
 
     # The assistant's message is automatically implemented into the conversation object.
     # Add a new user message.
@@ -33,10 +42,17 @@ async def main():
     )
 
     # Generate a response.
-    response: generate.ConversationResponse = await generate.simple_process_text(
+    response: generate.ConversationResponse = generate.process_text_streaming(
         conversation=conv,
         model=models.GPT_3,
         new_message="...",
     )
 
+    async for chunk in response:
+        if isinstance(chunk, conversation.GeneratingResponseChunk):
+            print(chunk.text, end="")
+        else:
+            print('\nConversation ended!')
+
+
 asyncio.run(main())