feat(streamed-generation): Implemented stream generation & example

Streamed generation, changed ConversationResponse (.response to .text)
2024-01-07 20:19:11 +01:00
parent 28749f0526
commit 507bdf0ad2
5 changed files with 36 additions and 19 deletions
--- a/copeai_backend/conversation.py
+++ b/copeai_backend/conversation.py
@@ -67,7 +67,7 @@ class Conversation:
        self.store = storage

        if add_base_prompt and BASE_PROMPT:
-            self.messages.append({"role": Role.SYSTEM, "content": BASE_PROMPT})
+            self.messages.append({"role": Role.SYSTEM.value, "content": BASE_PROMPT})

    def add_message(self, role: Role, message, username: str | None = None):
        if not self.locked:
@@ -97,5 +97,5 @@ class ConversationResponse:
    """A response from the generation. You receive this when the **generation is done**, or non-streamed requests."""

    conversation: Conversation
-    response: str | list[str]
+    text: str | list[str]
    raw_response: list[ChatCompletion] | list[ChatCompletionChunk]
--- a/copeai_backend/generate.py
+++ b/copeai_backend/generate.py
@@ -1,5 +1,6 @@
 import json
 import traceback
+from typing import Any, AsyncGenerator, Coroutine, Generator
 import requests
 import openai

@@ -40,7 +41,7 @@ async def process_text_streaming(
    model: Model,
    new_message: str,
    additional_args: dict = {},
-):
+) -> [ConversationResponse, GeneratingResponseChunk]: # FIXME change type
    if conversation.locked:
        raise ConversationLockedException()

@@ -79,15 +80,12 @@ async def process_text_streaming(
                        yield GeneratingResponseChunk(partition.content, chunk)

                    if conversation.interruput:
-                        conversation.add_message(Role.ASSISTANT, text_parts)
                        yield ConversationResponse(conversation, text_parts, resp_parts)

        conversation.locked = False
-        conversation.add_message(Role.ASSISTANT, text_parts)
+
+        conversation.add_message(Role.ASSISTANT, ''.join(text_parts))
        yield ConversationResponse(conversation, text_parts, resp_parts)
-
-        conversation.locked = False
-
    except Exception as e:
        conversation.locked = False
        raise e