feat(streamed-generation): Implemented stream generation & example

Streamed generation, changed ConversationResponse (.response to .text)
2024-01-07 20:19:11 +01:00
parent 28749f0526
commit 507bdf0ad2
5 changed files with 36 additions and 19 deletions
@@ -1,4 +1,7 @@
 {
    "python.analysis.autoImportCompletions": true,
-    "python.analysis.typeCheckingMode": "basic"
+    "python.analysis.typeCheckingMode": "off",
    "conventionalCommits.scopes": [
        "streamed-generation"
    ]
 }
@@ -67,7 +67,7 @@ class Conversation:
        self.store = storage
        if add_base_prompt and BASE_PROMPT:
-            self.messages.append({"role": Role.SYSTEM, "content": BASE_PROMPT})
+            self.messages.append({"role": Role.SYSTEM.value, "content": BASE_PROMPT})
    def add_message(self, role: Role, message, username: str | None = None):
        if not self.locked:
@@ -97,5 +97,5 @@ class ConversationResponse:
    """A response from the generation. You receive this when the **generation is done**, or non-streamed requests."""
    conversation: Conversation
-    response: str | list[str]
+    text: str | list[str]
    raw_response: list[ChatCompletion] | list[ChatCompletionChunk]
@@ -1,5 +1,6 @@
 import json
 import traceback
 from typing import Any, AsyncGenerator, Coroutine, Generator
 import requests
 import openai
@@ -40,7 +41,7 @@ async def process_text_streaming(
    model: Model,
    new_message: str,
    additional_args: dict = {},
-):
+) -> [ConversationResponse, GeneratingResponseChunk]: # FIXME change type
    if conversation.locked:
        raise ConversationLockedException()
@@ -79,15 +80,12 @@ async def process_text_streaming(
                        yield GeneratingResponseChunk(partition.content, chunk)
                    if conversation.interruput:
                        conversation.add_message(Role.ASSISTANT, text_parts)
                        yield ConversationResponse(conversation, text_parts, resp_parts)
        conversation.locked = False
-        conversation.add_message(Role.ASSISTANT, text_parts)
+
        conversation.add_message(Role.ASSISTANT, ''.join(text_parts))
        yield ConversationResponse(conversation, text_parts, resp_parts)
        conversation.locked = False
    except Exception as e:
        conversation.locked = False
        raise e
@@ -23,7 +23,7 @@ async def main():
    )
    # Print the response.
-    print(response.response)
+    print(response.text)
    # The assistant's message is automatically implemented into the conversation object.
    # Add a new user message.
@@ -1,5 +1,6 @@
 # fmt: off
 from typing import AsyncGenerator
 from copeai_backend import generate, models, conversation
 import asyncio
@@ -14,16 +15,24 @@ async def main():
        storage={}                          # If you need to store some data.
    )
-    # Generate a response. This is a non-streamed request, so it will return a ConversationResponse object. This is a blocking call.
+    # Generate a response. This is a streamed request, so it will return a GeneratingResponseChunk object.
-    response: generate.ConversationResponse = await generate.simple_process_text(
+    # Then, at the end of the generation, ConversationResponse will be returned.
-        conversation=conv,          # The conversation object.
+    # This is a non-blocking call.
-        model=models.GPT_3,     # The model to use. Add your own models to the MODELS dict in models.py.
+    response = generate.process_text_streaming(
        conversation=conv,                  # The conversation object.
        model=models.GPT_3,                 # The model to use. Add your own models to the MODELS dict in models.py.
        new_message="Hello, how are you?",  # The message to send.
-        # additional_args={}                  # Additional arguments to send to the API. These are different for each API.
+        # additional_args={}                # Additional arguments to send to the API. These are different for each API.
-    )
+    ) # type: ignore
-    # Print the response.
+    async for chunk in response:
-    print(response.response)
+        if isinstance(chunk, conversation.GeneratingResponseChunk):
            print(chunk.text, end="")
        else:
            print('\nConversation ended!')
    # To retrieve the response, you can use the ConversationResponse object, from the last iteration.
    _response = chunk.text
    # The assistant's message is automatically implemented into the conversation object.
    # Add a new user message.
@@ -33,10 +42,17 @@ async def main():
    )
    # Generate a response.
-    response: generate.ConversationResponse = await generate.simple_process_text(
+    response: generate.ConversationResponse = generate.process_text_streaming(
        conversation=conv,
        model=models.GPT_3,
        new_message="...",
    )
    async for chunk in response:
        if isinstance(chunk, conversation.GeneratingResponseChunk):
            print(chunk.text, end="")
        else:
            print('\nConversation ended!')
 asyncio.run(main())