feat(streamed-generation): Implemented stream generation & example
Streamed generation, changed ConversationResponse (.response to .text)
This commit is contained in:
parent
28749f0526
commit
507bdf0ad2
5
.vscode/settings.json
vendored
5
.vscode/settings.json
vendored
@ -1,4 +1,7 @@
|
||||
{
|
||||
"python.analysis.autoImportCompletions": true,
|
||||
"python.analysis.typeCheckingMode": "basic"
|
||||
"python.analysis.typeCheckingMode": "off",
|
||||
"conventionalCommits.scopes": [
|
||||
"streamed-generation"
|
||||
]
|
||||
}
|
||||
|
@ -67,7 +67,7 @@ class Conversation:
|
||||
self.store = storage
|
||||
|
||||
if add_base_prompt and BASE_PROMPT:
|
||||
self.messages.append({"role": Role.SYSTEM, "content": BASE_PROMPT})
|
||||
self.messages.append({"role": Role.SYSTEM.value, "content": BASE_PROMPT})
|
||||
|
||||
def add_message(self, role: Role, message, username: str | None = None):
|
||||
if not self.locked:
|
||||
@ -97,5 +97,5 @@ class ConversationResponse:
|
||||
"""A response from the generation. You receive this when the **generation is done**, or non-streamed requests."""
|
||||
|
||||
conversation: Conversation
|
||||
response: str | list[str]
|
||||
text: str | list[str]
|
||||
raw_response: list[ChatCompletion] | list[ChatCompletionChunk]
|
||||
|
@ -1,5 +1,6 @@
|
||||
import json
|
||||
import traceback
|
||||
from typing import Any, AsyncGenerator, Coroutine, Generator
|
||||
import requests
|
||||
import openai
|
||||
|
||||
@ -40,7 +41,7 @@ async def process_text_streaming(
|
||||
model: Model,
|
||||
new_message: str,
|
||||
additional_args: dict = {},
|
||||
):
|
||||
) -> [ConversationResponse, GeneratingResponseChunk]: # FIXME change type
|
||||
if conversation.locked:
|
||||
raise ConversationLockedException()
|
||||
|
||||
@ -79,15 +80,12 @@ async def process_text_streaming(
|
||||
yield GeneratingResponseChunk(partition.content, chunk)
|
||||
|
||||
if conversation.interruput:
|
||||
conversation.add_message(Role.ASSISTANT, text_parts)
|
||||
yield ConversationResponse(conversation, text_parts, resp_parts)
|
||||
|
||||
conversation.locked = False
|
||||
conversation.add_message(Role.ASSISTANT, text_parts)
|
||||
yield ConversationResponse(conversation, text_parts, resp_parts)
|
||||
|
||||
conversation.locked = False
|
||||
|
||||
conversation.add_message(Role.ASSISTANT, ''.join(text_parts))
|
||||
yield ConversationResponse(conversation, text_parts, resp_parts)
|
||||
except Exception as e:
|
||||
conversation.locked = False
|
||||
raise e
|
||||
|
@ -23,7 +23,7 @@ async def main():
|
||||
)
|
||||
|
||||
# Print the response.
|
||||
print(response.response)
|
||||
print(response.text)
|
||||
|
||||
# The assistant's message is automatically implemented into the conversation object.
|
||||
# Add a new user message.
|
||||
|
@ -1,5 +1,6 @@
|
||||
# fmt: off
|
||||
|
||||
from typing import AsyncGenerator
|
||||
from copeai_backend import generate, models, conversation
|
||||
import asyncio
|
||||
|
||||
@ -14,16 +15,24 @@ async def main():
|
||||
storage={} # If you need to store some data.
|
||||
)
|
||||
|
||||
# Generate a response. This is a non-streamed request, so it will return a ConversationResponse object. This is a blocking call.
|
||||
response: generate.ConversationResponse = await generate.simple_process_text(
|
||||
# Generate a response. This is a streamed request, so it will return a GeneratingResponseChunk object.
|
||||
# Then, at the end of the generation, ConversationResponse will be returned.
|
||||
# This is a non-blocking call.
|
||||
response = generate.process_text_streaming(
|
||||
conversation=conv, # The conversation object.
|
||||
model=models.GPT_3, # The model to use. Add your own models to the MODELS dict in models.py.
|
||||
new_message="Hello, how are you?", # The message to send.
|
||||
# additional_args={} # Additional arguments to send to the API. These are different for each API.
|
||||
)
|
||||
) # type: ignore
|
||||
|
||||
# Print the response.
|
||||
print(response.response)
|
||||
async for chunk in response:
|
||||
if isinstance(chunk, conversation.GeneratingResponseChunk):
|
||||
print(chunk.text, end="")
|
||||
else:
|
||||
print('\nConversation ended!')
|
||||
|
||||
# To retrieve the response, you can use the ConversationResponse object, from the last iteration.
|
||||
_response = chunk.text
|
||||
|
||||
# The assistant's message is automatically implemented into the conversation object.
|
||||
# Add a new user message.
|
||||
@ -33,10 +42,17 @@ async def main():
|
||||
)
|
||||
|
||||
# Generate a response.
|
||||
response: generate.ConversationResponse = await generate.simple_process_text(
|
||||
response: generate.ConversationResponse = generate.process_text_streaming(
|
||||
conversation=conv,
|
||||
model=models.GPT_3,
|
||||
new_message="...",
|
||||
)
|
||||
|
||||
async for chunk in response:
|
||||
if isinstance(chunk, conversation.GeneratingResponseChunk):
|
||||
print(chunk.text, end="")
|
||||
else:
|
||||
print('\nConversation ended!')
|
||||
|
||||
|
||||
asyncio.run(main())
|
||||
|
Loading…
Reference in New Issue
Block a user