feat(streamed-generation): Implemented stream generation & example
Streamed generation, changed ConversationResponse (.response to .text)
This commit is contained in:
parent
28749f0526
commit
507bdf0ad2
5
.vscode/settings.json
vendored
5
.vscode/settings.json
vendored
@ -1,4 +1,7 @@
|
|||||||
{
|
{
|
||||||
"python.analysis.autoImportCompletions": true,
|
"python.analysis.autoImportCompletions": true,
|
||||||
"python.analysis.typeCheckingMode": "basic"
|
"python.analysis.typeCheckingMode": "off",
|
||||||
|
"conventionalCommits.scopes": [
|
||||||
|
"streamed-generation"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
|
@ -67,7 +67,7 @@ class Conversation:
|
|||||||
self.store = storage
|
self.store = storage
|
||||||
|
|
||||||
if add_base_prompt and BASE_PROMPT:
|
if add_base_prompt and BASE_PROMPT:
|
||||||
self.messages.append({"role": Role.SYSTEM, "content": BASE_PROMPT})
|
self.messages.append({"role": Role.SYSTEM.value, "content": BASE_PROMPT})
|
||||||
|
|
||||||
def add_message(self, role: Role, message, username: str | None = None):
|
def add_message(self, role: Role, message, username: str | None = None):
|
||||||
if not self.locked:
|
if not self.locked:
|
||||||
@ -97,5 +97,5 @@ class ConversationResponse:
|
|||||||
"""A response from the generation. You receive this when the **generation is done**, or non-streamed requests."""
|
"""A response from the generation. You receive this when the **generation is done**, or non-streamed requests."""
|
||||||
|
|
||||||
conversation: Conversation
|
conversation: Conversation
|
||||||
response: str | list[str]
|
text: str | list[str]
|
||||||
raw_response: list[ChatCompletion] | list[ChatCompletionChunk]
|
raw_response: list[ChatCompletion] | list[ChatCompletionChunk]
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
import traceback
|
import traceback
|
||||||
|
from typing import Any, AsyncGenerator, Coroutine, Generator
|
||||||
import requests
|
import requests
|
||||||
import openai
|
import openai
|
||||||
|
|
||||||
@ -40,7 +41,7 @@ async def process_text_streaming(
|
|||||||
model: Model,
|
model: Model,
|
||||||
new_message: str,
|
new_message: str,
|
||||||
additional_args: dict = {},
|
additional_args: dict = {},
|
||||||
):
|
) -> [ConversationResponse, GeneratingResponseChunk]: # FIXME change type
|
||||||
if conversation.locked:
|
if conversation.locked:
|
||||||
raise ConversationLockedException()
|
raise ConversationLockedException()
|
||||||
|
|
||||||
@ -79,15 +80,12 @@ async def process_text_streaming(
|
|||||||
yield GeneratingResponseChunk(partition.content, chunk)
|
yield GeneratingResponseChunk(partition.content, chunk)
|
||||||
|
|
||||||
if conversation.interruput:
|
if conversation.interruput:
|
||||||
conversation.add_message(Role.ASSISTANT, text_parts)
|
|
||||||
yield ConversationResponse(conversation, text_parts, resp_parts)
|
yield ConversationResponse(conversation, text_parts, resp_parts)
|
||||||
|
|
||||||
conversation.locked = False
|
conversation.locked = False
|
||||||
conversation.add_message(Role.ASSISTANT, text_parts)
|
|
||||||
|
conversation.add_message(Role.ASSISTANT, ''.join(text_parts))
|
||||||
yield ConversationResponse(conversation, text_parts, resp_parts)
|
yield ConversationResponse(conversation, text_parts, resp_parts)
|
||||||
|
|
||||||
conversation.locked = False
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
conversation.locked = False
|
conversation.locked = False
|
||||||
raise e
|
raise e
|
||||||
|
@ -23,7 +23,7 @@ async def main():
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Print the response.
|
# Print the response.
|
||||||
print(response.response)
|
print(response.text)
|
||||||
|
|
||||||
# The assistant's message is automatically implemented into the conversation object.
|
# The assistant's message is automatically implemented into the conversation object.
|
||||||
# Add a new user message.
|
# Add a new user message.
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
# fmt: off
|
# fmt: off
|
||||||
|
|
||||||
|
from typing import AsyncGenerator
|
||||||
from copeai_backend import generate, models, conversation
|
from copeai_backend import generate, models, conversation
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
@ -14,16 +15,24 @@ async def main():
|
|||||||
storage={} # If you need to store some data.
|
storage={} # If you need to store some data.
|
||||||
)
|
)
|
||||||
|
|
||||||
# Generate a response. This is a non-streamed request, so it will return a ConversationResponse object. This is a blocking call.
|
# Generate a response. This is a streamed request, so it will return a GeneratingResponseChunk object.
|
||||||
response: generate.ConversationResponse = await generate.simple_process_text(
|
# Then, at the end of the generation, ConversationResponse will be returned.
|
||||||
conversation=conv, # The conversation object.
|
# This is a non-blocking call.
|
||||||
model=models.GPT_3, # The model to use. Add your own models to the MODELS dict in models.py.
|
response = generate.process_text_streaming(
|
||||||
|
conversation=conv, # The conversation object.
|
||||||
|
model=models.GPT_3, # The model to use. Add your own models to the MODELS dict in models.py.
|
||||||
new_message="Hello, how are you?", # The message to send.
|
new_message="Hello, how are you?", # The message to send.
|
||||||
# additional_args={} # Additional arguments to send to the API. These are different for each API.
|
# additional_args={} # Additional arguments to send to the API. These are different for each API.
|
||||||
)
|
) # type: ignore
|
||||||
|
|
||||||
# Print the response.
|
async for chunk in response:
|
||||||
print(response.response)
|
if isinstance(chunk, conversation.GeneratingResponseChunk):
|
||||||
|
print(chunk.text, end="")
|
||||||
|
else:
|
||||||
|
print('\nConversation ended!')
|
||||||
|
|
||||||
|
# To retrieve the response, you can use the ConversationResponse object, from the last iteration.
|
||||||
|
_response = chunk.text
|
||||||
|
|
||||||
# The assistant's message is automatically implemented into the conversation object.
|
# The assistant's message is automatically implemented into the conversation object.
|
||||||
# Add a new user message.
|
# Add a new user message.
|
||||||
@ -33,10 +42,17 @@ async def main():
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Generate a response.
|
# Generate a response.
|
||||||
response: generate.ConversationResponse = await generate.simple_process_text(
|
response: generate.ConversationResponse = generate.process_text_streaming(
|
||||||
conversation=conv,
|
conversation=conv,
|
||||||
model=models.GPT_3,
|
model=models.GPT_3,
|
||||||
new_message="...",
|
new_message="...",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async for chunk in response:
|
||||||
|
if isinstance(chunk, conversation.GeneratingResponseChunk):
|
||||||
|
print(chunk.text, end="")
|
||||||
|
else:
|
||||||
|
print('\nConversation ended!')
|
||||||
|
|
||||||
|
|
||||||
asyncio.run(main())
|
asyncio.run(main())
|
||||||
|
Loading…
Reference in New Issue
Block a user