From 507bdf0ad267e493603e6c5d9d47abd2cbcc6136 Mon Sep 17 00:00:00 2001 From: showdown76py Date: Sun, 7 Jan 2024 20:19:11 +0100 Subject: [PATCH] feat(streamed-generation): Implemented stream generation & example Streamed generation, changed ConversationResponse (.response to .text) --- .vscode/settings.json | 5 ++++- copeai_backend/conversation.py | 4 ++-- copeai_backend/generate.py | 10 ++++------ examples/basic-generation.py | 2 +- examples/streamed-generation.py | 34 ++++++++++++++++++++++++--------- 5 files changed, 36 insertions(+), 19 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index eea234c..63e79b7 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,7 @@ { "python.analysis.autoImportCompletions": true, - "python.analysis.typeCheckingMode": "basic" + "python.analysis.typeCheckingMode": "off", + "conventionalCommits.scopes": [ + "streamed-generation" + ] } diff --git a/copeai_backend/conversation.py b/copeai_backend/conversation.py index 70b3018..e8c38f7 100644 --- a/copeai_backend/conversation.py +++ b/copeai_backend/conversation.py @@ -67,7 +67,7 @@ class Conversation: self.store = storage if add_base_prompt and BASE_PROMPT: - self.messages.append({"role": Role.SYSTEM, "content": BASE_PROMPT}) + self.messages.append({"role": Role.SYSTEM.value, "content": BASE_PROMPT}) def add_message(self, role: Role, message, username: str | None = None): if not self.locked: @@ -97,5 +97,5 @@ class ConversationResponse: """A response from the generation. You receive this when the **generation is done**, or non-streamed requests.""" conversation: Conversation - response: str | list[str] + text: str | list[str] raw_response: list[ChatCompletion] | list[ChatCompletionChunk] diff --git a/copeai_backend/generate.py b/copeai_backend/generate.py index 79e8bd5..671cc80 100644 --- a/copeai_backend/generate.py +++ b/copeai_backend/generate.py @@ -1,5 +1,6 @@ import json import traceback +from typing import Any, AsyncGenerator, Coroutine, Generator import requests import openai @@ -40,7 +41,7 @@ async def process_text_streaming( model: Model, new_message: str, additional_args: dict = {}, -): +) -> [ConversationResponse, GeneratingResponseChunk]: # FIXME change type if conversation.locked: raise ConversationLockedException() @@ -79,15 +80,12 @@ async def process_text_streaming( yield GeneratingResponseChunk(partition.content, chunk) if conversation.interruput: - conversation.add_message(Role.ASSISTANT, text_parts) yield ConversationResponse(conversation, text_parts, resp_parts) conversation.locked = False - conversation.add_message(Role.ASSISTANT, text_parts) + + conversation.add_message(Role.ASSISTANT, ''.join(text_parts)) yield ConversationResponse(conversation, text_parts, resp_parts) - - conversation.locked = False - except Exception as e: conversation.locked = False raise e diff --git a/examples/basic-generation.py b/examples/basic-generation.py index 18936ec..67ce806 100644 --- a/examples/basic-generation.py +++ b/examples/basic-generation.py @@ -23,7 +23,7 @@ async def main(): ) # Print the response. - print(response.response) + print(response.text) # The assistant's message is automatically implemented into the conversation object. # Add a new user message. diff --git a/examples/streamed-generation.py b/examples/streamed-generation.py index 18936ec..c1a9ac7 100644 --- a/examples/streamed-generation.py +++ b/examples/streamed-generation.py @@ -1,5 +1,6 @@ # fmt: off +from typing import AsyncGenerator from copeai_backend import generate, models, conversation import asyncio @@ -14,16 +15,24 @@ async def main(): storage={} # If you need to store some data. ) - # Generate a response. This is a non-streamed request, so it will return a ConversationResponse object. This is a blocking call. - response: generate.ConversationResponse = await generate.simple_process_text( - conversation=conv, # The conversation object. - model=models.GPT_3, # The model to use. Add your own models to the MODELS dict in models.py. + # Generate a response. This is a streamed request, so it will return a GeneratingResponseChunk object. + # Then, at the end of the generation, ConversationResponse will be returned. + # This is a non-blocking call. + response = generate.process_text_streaming( + conversation=conv, # The conversation object. + model=models.GPT_3, # The model to use. Add your own models to the MODELS dict in models.py. new_message="Hello, how are you?", # The message to send. - # additional_args={} # Additional arguments to send to the API. These are different for each API. - ) + # additional_args={} # Additional arguments to send to the API. These are different for each API. + ) # type: ignore - # Print the response. - print(response.response) + async for chunk in response: + if isinstance(chunk, conversation.GeneratingResponseChunk): + print(chunk.text, end="") + else: + print('\nConversation ended!') + + # To retrieve the response, you can use the ConversationResponse object, from the last iteration. + _response = chunk.text # The assistant's message is automatically implemented into the conversation object. # Add a new user message. @@ -33,10 +42,17 @@ async def main(): ) # Generate a response. - response: generate.ConversationResponse = await generate.simple_process_text( + response: generate.ConversationResponse = generate.process_text_streaming( conversation=conv, model=models.GPT_3, new_message="...", ) + async for chunk in response: + if isinstance(chunk, conversation.GeneratingResponseChunk): + print(chunk.text, end="") + else: + print('\nConversation ended!') + + asyncio.run(main())