feat(streamed-generation): Implemented stream generation & example

Streamed generation, changed ConversationResponse (.response to .text)
2024-01-07 20:19:11 +01:00
parent 28749f0526
commit 507bdf0ad2
5 changed files with 36 additions and 19 deletions
@@ -1,4 +1,7 @@
 {
    "python.analysis.autoImportCompletions": true,
-    "python.analysis.typeCheckingMode": "basic"
+    "python.analysis.typeCheckingMode": "off",
+    "conventionalCommits.scopes": [
+        "streamed-generation"
+    ]
 }
@@ -67,7 +67,7 @@ class Conversation:
        self.store = storage

        if add_base_prompt and BASE_PROMPT:
-            self.messages.append({"role": Role.SYSTEM, "content": BASE_PROMPT})
+            self.messages.append({"role": Role.SYSTEM.value, "content": BASE_PROMPT})

    def add_message(self, role: Role, message, username: str | None = None):
        if not self.locked:
@@ -97,5 +97,5 @@ class ConversationResponse:
    """A response from the generation. You receive this when the **generation is done**, or non-streamed requests."""

    conversation: Conversation
-    response: str | list[str]
+    text: str | list[str]
    raw_response: list[ChatCompletion] | list[ChatCompletionChunk]
@@ -1,5 +1,6 @@
 import json
 import traceback
+from typing import Any, AsyncGenerator, Coroutine, Generator
 import requests
 import openai

@@ -40,7 +41,7 @@ async def process_text_streaming(
    model: Model,
    new_message: str,
    additional_args: dict = {},
-):
+) -> [ConversationResponse, GeneratingResponseChunk]: # FIXME change type
    if conversation.locked:
        raise ConversationLockedException()

@@ -79,15 +80,12 @@ async def process_text_streaming(
                        yield GeneratingResponseChunk(partition.content, chunk)

                    if conversation.interruput:
-                        conversation.add_message(Role.ASSISTANT, text_parts)
                        yield ConversationResponse(conversation, text_parts, resp_parts)

        conversation.locked = False
-        conversation.add_message(Role.ASSISTANT, text_parts)
+
+        conversation.add_message(Role.ASSISTANT, ''.join(text_parts))
        yield ConversationResponse(conversation, text_parts, resp_parts)
-
-        conversation.locked = False
-
    except Exception as e:
        conversation.locked = False
        raise e
@@ -23,7 +23,7 @@ async def main():
    )

    # Print the response.
-    print(response.response)
+    print(response.text)

    # The assistant's message is automatically implemented into the conversation object.
    # Add a new user message.
@@ -1,5 +1,6 @@
 # fmt: off

+from typing import AsyncGenerator
 from copeai_backend import generate, models, conversation
 import asyncio

@@ -14,16 +15,24 @@ async def main():
        storage={}                          # If you need to store some data.
    )

-    # Generate a response. This is a non-streamed request, so it will return a ConversationResponse object. This is a blocking call.
-    response: generate.ConversationResponse = await generate.simple_process_text(
-        conversation=conv,          # The conversation object.
-        model=models.GPT_3,     # The model to use. Add your own models to the MODELS dict in models.py.
+    # Generate a response. This is a streamed request, so it will return a GeneratingResponseChunk object.
+    # Then, at the end of the generation, ConversationResponse will be returned.
+    # This is a non-blocking call.
+    response = generate.process_text_streaming(
+        conversation=conv,                  # The conversation object.
+        model=models.GPT_3,                 # The model to use. Add your own models to the MODELS dict in models.py.
        new_message="Hello, how are you?",  # The message to send.
-        # additional_args={}                  # Additional arguments to send to the API. These are different for each API.
-    )
+        # additional_args={}                # Additional arguments to send to the API. These are different for each API.
+    ) # type: ignore

-    # Print the response.
-    print(response.response)
+    async for chunk in response:
+        if isinstance(chunk, conversation.GeneratingResponseChunk):
+            print(chunk.text, end="")
+        else:
+            print('\nConversation ended!')
+        
+    # To retrieve the response, you can use the ConversationResponse object, from the last iteration.
+    _response = chunk.text

    # The assistant's message is automatically implemented into the conversation object.
    # Add a new user message.
@@ -33,10 +42,17 @@ async def main():
    )

    # Generate a response.
-    response: generate.ConversationResponse = await generate.simple_process_text(
+    response: generate.ConversationResponse = generate.process_text_streaming(
        conversation=conv,
        model=models.GPT_3,
        new_message="...",
    )

+    async for chunk in response:
+        if isinstance(chunk, conversation.GeneratingResponseChunk):
+            print(chunk.text, end="")
+        else:
+            print('\nConversation ended!')
+
+
 asyncio.run(main())