feat(streamed-generation): Implemented stream generation & example

Streamed generation, changed ConversationResponse (.response to .text)
This commit is contained in:
Showdown76 2024-01-07 20:19:11 +01:00
parent 28749f0526
commit 507bdf0ad2
5 changed files with 36 additions and 19 deletions

View File

@ -1,4 +1,7 @@
{ {
"python.analysis.autoImportCompletions": true, "python.analysis.autoImportCompletions": true,
"python.analysis.typeCheckingMode": "basic" "python.analysis.typeCheckingMode": "off",
"conventionalCommits.scopes": [
"streamed-generation"
]
} }

View File

@ -67,7 +67,7 @@ class Conversation:
self.store = storage self.store = storage
if add_base_prompt and BASE_PROMPT: if add_base_prompt and BASE_PROMPT:
self.messages.append({"role": Role.SYSTEM, "content": BASE_PROMPT}) self.messages.append({"role": Role.SYSTEM.value, "content": BASE_PROMPT})
def add_message(self, role: Role, message, username: str | None = None): def add_message(self, role: Role, message, username: str | None = None):
if not self.locked: if not self.locked:
@ -97,5 +97,5 @@ class ConversationResponse:
"""A response from the generation. You receive this when the **generation is done**, or non-streamed requests.""" """A response from the generation. You receive this when the **generation is done**, or non-streamed requests."""
conversation: Conversation conversation: Conversation
response: str | list[str] text: str | list[str]
raw_response: list[ChatCompletion] | list[ChatCompletionChunk] raw_response: list[ChatCompletion] | list[ChatCompletionChunk]

View File

@ -1,5 +1,6 @@
import json import json
import traceback import traceback
from typing import Any, AsyncGenerator, Coroutine, Generator
import requests import requests
import openai import openai
@ -40,7 +41,7 @@ async def process_text_streaming(
model: Model, model: Model,
new_message: str, new_message: str,
additional_args: dict = {}, additional_args: dict = {},
): ) -> [ConversationResponse, GeneratingResponseChunk]: # FIXME change type
if conversation.locked: if conversation.locked:
raise ConversationLockedException() raise ConversationLockedException()
@ -79,15 +80,12 @@ async def process_text_streaming(
yield GeneratingResponseChunk(partition.content, chunk) yield GeneratingResponseChunk(partition.content, chunk)
if conversation.interruput: if conversation.interruput:
conversation.add_message(Role.ASSISTANT, text_parts)
yield ConversationResponse(conversation, text_parts, resp_parts)
conversation.locked = False
conversation.add_message(Role.ASSISTANT, text_parts)
yield ConversationResponse(conversation, text_parts, resp_parts) yield ConversationResponse(conversation, text_parts, resp_parts)
conversation.locked = False conversation.locked = False
conversation.add_message(Role.ASSISTANT, ''.join(text_parts))
yield ConversationResponse(conversation, text_parts, resp_parts)
except Exception as e: except Exception as e:
conversation.locked = False conversation.locked = False
raise e raise e

View File

@ -23,7 +23,7 @@ async def main():
) )
# Print the response. # Print the response.
print(response.response) print(response.text)
# The assistant's message is automatically implemented into the conversation object. # The assistant's message is automatically implemented into the conversation object.
# Add a new user message. # Add a new user message.

View File

@ -1,5 +1,6 @@
# fmt: off # fmt: off
from typing import AsyncGenerator
from copeai_backend import generate, models, conversation from copeai_backend import generate, models, conversation
import asyncio import asyncio
@ -14,16 +15,24 @@ async def main():
storage={} # If you need to store some data. storage={} # If you need to store some data.
) )
# Generate a response. This is a non-streamed request, so it will return a ConversationResponse object. This is a blocking call. # Generate a response. This is a streamed request, so it will return a GeneratingResponseChunk object.
response: generate.ConversationResponse = await generate.simple_process_text( # Then, at the end of the generation, ConversationResponse will be returned.
# This is a non-blocking call.
response = generate.process_text_streaming(
conversation=conv, # The conversation object. conversation=conv, # The conversation object.
model=models.GPT_3, # The model to use. Add your own models to the MODELS dict in models.py. model=models.GPT_3, # The model to use. Add your own models to the MODELS dict in models.py.
new_message="Hello, how are you?", # The message to send. new_message="Hello, how are you?", # The message to send.
# additional_args={} # Additional arguments to send to the API. These are different for each API. # additional_args={} # Additional arguments to send to the API. These are different for each API.
) ) # type: ignore
# Print the response. async for chunk in response:
print(response.response) if isinstance(chunk, conversation.GeneratingResponseChunk):
print(chunk.text, end="")
else:
print('\nConversation ended!')
# To retrieve the response, you can use the ConversationResponse object, from the last iteration.
_response = chunk.text
# The assistant's message is automatically implemented into the conversation object. # The assistant's message is automatically implemented into the conversation object.
# Add a new user message. # Add a new user message.
@ -33,10 +42,17 @@ async def main():
) )
# Generate a response. # Generate a response.
response: generate.ConversationResponse = await generate.simple_process_text( response: generate.ConversationResponse = generate.process_text_streaming(
conversation=conv, conversation=conv,
model=models.GPT_3, model=models.GPT_3,
new_message="...", new_message="...",
) )
async for chunk in response:
if isinstance(chunk, conversation.GeneratingResponseChunk):
print(chunk.text, end="")
else:
print('\nConversation ended!')
asyncio.run(main()) asyncio.run(main())