From 21d4ad64f3d8e56600ba6414093a3453b04202a2 Mon Sep 17 00:00:00 2001 From: showdown76py Date: Tue, 16 Jan 2024 21:07:39 +0100 Subject: [PATCH] feat: wip, v2 --- copeai_backend/__init__.py | 3 + copeai_backend/conversation.py | 102 ++++++++++++++++++ .../exception/LockedConversationException.py | 7 ++ copeai_backend/exception/__init__.py | 1 + copeai_backend/generate.py | 91 ++++++++++++++++ copeai_backend/models.py | 17 +++ main.py | 73 ++++++++----- views/GenerationState.py | 21 ++++ 8 files changed, 289 insertions(+), 26 deletions(-) create mode 100644 copeai_backend/__init__.py create mode 100644 copeai_backend/conversation.py create mode 100644 copeai_backend/exception/LockedConversationException.py create mode 100644 copeai_backend/exception/__init__.py create mode 100644 copeai_backend/generate.py create mode 100644 copeai_backend/models.py create mode 100644 views/GenerationState.py diff --git a/copeai_backend/__init__.py b/copeai_backend/__init__.py new file mode 100644 index 0000000..08db791 --- /dev/null +++ b/copeai_backend/__init__.py @@ -0,0 +1,3 @@ +from .conversation import Conversation, ConversationResponse, Role +from .generate import process_text_streaming, simple_process_text +from .models import Model, Service, GPT_3, GPT_4 diff --git a/copeai_backend/conversation.py b/copeai_backend/conversation.py new file mode 100644 index 0000000..c058bef --- /dev/null +++ b/copeai_backend/conversation.py @@ -0,0 +1,102 @@ +from dataclasses import dataclass +import typing +from openai import AsyncStream +from openai.types.chat import ChatCompletionChunk, ChatCompletion +import tiktoken +from enum import Enum + +from copeai_backend.exception import ConversationLockedException + +from . import models + +encoding = tiktoken.get_encoding("cl100k_base") + +BASE_PROMPT = "" + +class Role(Enum): + SYSTEM = "system" + USER = "user" + ASSISTANT = "assistant" + + +@dataclass +class GeneratingResponseChunk: + """A chunk of a response from the model. You receive this when the **generation is still going on**, and streamed.""" + + text: str + raw: ChatCompletionChunk + + +class Conversation: + def __init__(self, add_base_prompt: bool = True, storage: dict = {}) -> None: + self.messages = [] + self.last_used_model: models.Model | None = None + self.locked = False + self.interruput = False + self.store = storage + + if add_base_prompt and BASE_PROMPT: + self.messages.append({"role": Role.SYSTEM.value, "content": BASE_PROMPT}) + + def add_message(self, role: Role, message, username: str | None = None): + if not self.locked: + d = {"role": role.value, "content": message} + if username: + d["name"] = username + self.messages.append(d) + else: + raise ConversationLockedException() + + def interrupt(self): + """Interrupts any conversations going on.""" + self.interruput = True + + def get_tokens(self): + return text_to_tokens(self.messages) + + def last_role(self): + return Role(self.messages[-1]["role"]) + + def last_message(self): + return self.messages[-1]["content"] + + +@dataclass +class ConversationResponse: + """A response from the generation. You receive this when the **generation is done**, or non-streamed requests.""" + + conversation: Conversation + text: str | list[str] + raw_response: list[ChatCompletion] | list[ChatCompletionChunk] + +def text_to_tokens(string_or_messages: str | list[str | dict | list] | Conversation) -> int: + """Returns the number of tokens in a text string.""" + num_tokens = 0 + + messages = [] + if isinstance(string_or_messages, str): + messages = [{"role": "user", "content": string_or_messages}] + else: + messages = string_or_messages + + for message in messages: + # every message follows {role/name}\n{content}\n + num_tokens += 4 + + if isinstance(message, dict): + for key, value in message.items(): + num_tokens += len(encoding.encode(str(value))) + if key == "name": # if there's a name, the role is omitted + num_tokens += 1 # role is always required and always 1 token + elif isinstance(message, list): + for item in message: + if item["type"] == "text": + num_tokens += len(encoding.encode(item["text"])) + elif isinstance(message, str): + num_tokens += len(encoding.encode(message)) + elif isinstance(messages, Conversation): + for message in messages.messages: + num_tokens += text_to_tokens(message["content"]) + num_tokens += 2 # every reply is primed with assistant + + return num_tokens \ No newline at end of file diff --git a/copeai_backend/exception/LockedConversationException.py b/copeai_backend/exception/LockedConversationException.py new file mode 100644 index 0000000..6054642 --- /dev/null +++ b/copeai_backend/exception/LockedConversationException.py @@ -0,0 +1,7 @@ +class ConversationLockedException(Exception): + """Raised when there is already an ongoing conversation.""" + + def __init__(self): + super().__init__( + "There is already an ongoing conversation. Please wait until it is finished." + ) diff --git a/copeai_backend/exception/__init__.py b/copeai_backend/exception/__init__.py new file mode 100644 index 0000000..51a73e5 --- /dev/null +++ b/copeai_backend/exception/__init__.py @@ -0,0 +1 @@ +from .LockedConversationException import ConversationLockedException diff --git a/copeai_backend/generate.py b/copeai_backend/generate.py new file mode 100644 index 0000000..671cc80 --- /dev/null +++ b/copeai_backend/generate.py @@ -0,0 +1,91 @@ +import json +import traceback +from typing import Any, AsyncGenerator, Coroutine, Generator +import requests +import openai + +import asyncio +from dotenv import load_dotenv +import os +from .conversation import ( + Conversation, + Role, + ConversationResponse, + GeneratingResponseChunk, +) +from .models import Model +from .exception import ConversationLockedException + +load_dotenv() + +oclient = openai.AsyncOpenAI(api_key=os.environ.get("OPENAI_KEY")) + + +async def simple_process_text( + conversation: Conversation, + model: Model, + new_message: str, + additional_args: dict = {}, +) -> ConversationResponse: + conversation.add_message(Role.USER, new_message) + conversation.last_used_model = model + r = await oclient.chat.completions.create( + model=model.id, messages=conversation.messages, **additional_args + ) + conversation.add_message(Role.ASSISTANT, r.choices[0].message.content) + return ConversationResponse(conversation, r.choices[0].message.content, r) + + +async def process_text_streaming( + conversation: Conversation, + model: Model, + new_message: str, + additional_args: dict = {}, +) -> [ConversationResponse, GeneratingResponseChunk]: # FIXME change type + if conversation.locked: + raise ConversationLockedException() + + try: + text_parts = [] + resp_parts = [] + + conversation.add_message( + Role.USER, + new_message, + (additional_args["userid"] if "userid" in additional_args else "unknown"), + ) + conversation.last_used_model = model + conversation.locked = True + if model.service == "openai": + response = await oclient.chat.completions.create( + model=model.id, + messages=conversation.messages, + temperature=0.9, + top_p=1.0, + presence_penalty=0.6, + frequency_penalty=0.0, + max_tokens=4096, + stream=True, + ) + + async for chunk in response: + partition = chunk.choices[0].delta + if ( + "content" + in json.loads(chunk.model_dump_json())["choices"][0]["delta"].keys() + ): + if partition.content is not None: + text_parts.append(partition.content) + resp_parts.append(chunk) + yield GeneratingResponseChunk(partition.content, chunk) + + if conversation.interruput: + yield ConversationResponse(conversation, text_parts, resp_parts) + + conversation.locked = False + + conversation.add_message(Role.ASSISTANT, ''.join(text_parts)) + yield ConversationResponse(conversation, text_parts, resp_parts) + except Exception as e: + conversation.locked = False + raise e diff --git a/copeai_backend/models.py b/copeai_backend/models.py new file mode 100644 index 0000000..de9902c --- /dev/null +++ b/copeai_backend/models.py @@ -0,0 +1,17 @@ +from dataclasses import dataclass +from typing import Literal + + +Service = Literal["openai", "bard"] + + +@dataclass +class Model: + id: str + usage_name: str + service: Service + + +GPT_3 = Model(id="gpt-3.5-turbo-16k-0613", usage_name="GPT-3", service="openai") + +GPT_4 = Model(id="gpt-4-16k-0613", usage_name="GPT-4", service="openai") diff --git a/main.py b/main.py index 748d486..484d4cd 100644 --- a/main.py +++ b/main.py @@ -8,6 +8,9 @@ import openai import sqlite3 import tiktoken from dotenv import load_dotenv +from typing import Dict +import copeai_backend +import views.GenerationState load_dotenv() @@ -19,6 +22,8 @@ intents.members = True intents.presences = True intents.dm_messages = True +cached_conversations: Dict[discord.User, copeai_backend.conversation.Conversation] = {} + class App(discord.Client): def __init__(self): super().__init__(intents=intents) @@ -55,11 +60,6 @@ async def on_message(message: discord.Message): if not isinstance(message.channel, discord.DMChannel): return if message.author.id == app.user.id: return try: - c = db.cursor() - c.execute('SELECT * FROM message_history WHERE user_id = ? ORDER BY timestamp DESC', (message.author.id,)) - msgs = c.fetchall() - message_token_usage = num_tokens_from_string(message.content) - max_token = int(os.environ['MAX_TOKEN_PER_REQUEST']) with open('base-prompt.txt', 'r', encoding='utf-8') as f: bprompt = f.read() @@ -84,32 +84,53 @@ async def on_message(message: discord.Message): } for arg in arguments.keys(): bprompt = bprompt.replace(f'|{arg}|', arguments[arg]) - - previous_tokens = 200+len(bprompt)+message_token_usage - # (message_id, user_id, content, token, role, timestamp) - # order by timestamp (most recent to least recent) - usable_messages = [] - for msg in msgs: - d = previous_tokens + msg[3] - if d >= max_token: - break - previous_tokens += msg[3] - usable_messages.append(msg) - - usable_messages.reverse() + if message.author not in cached_conversations: + cached_conversations[message.author] = copeai_backend.conversation.Conversation() + c = db.cursor() + c.execute('SELECT * FROM message_history WHERE user_id = ? ORDER BY timestamp DESC', (message.author.id,)) + msgs = c.fetchall() + message_token_usage = num_tokens_from_string(message.content) + max_token = int(os.environ['MAX_TOKEN_PER_REQUEST']) + + previous_tokens = 200+len(bprompt)+message_token_usage + # (message_id, user_id, content, token, role, timestamp) + # order by timestamp (most recent to least recent) + usable_messages = [] + for msg in msgs: + d = previous_tokens + msg[3] + if d >= max_token: + break + previous_tokens += msg[3] + usable_messages.append(msg) + + usable_messages.reverse() + + + messages = [{"role": "system", "content": bprompt}] + for v in usable_messages: messages.append({"role": v[4], "content": v[2]}) + else: + total_tokens = copeai_backend.conversation.text_to_tokens(cached_conversations[message.author]) + while total_tokens > int(os.environ['MAX_TOKEN_PER_REQUEST']) - 400: + cached_conversations[message.author].messages.pop(0) + total_tokens = copeai_backend.conversation.text_to_tokens(cached_conversations[message.author]) - messages = [{"role": "system", "content": bprompt}] - for v in usable_messages: messages.append({"role": v[4], "content": v[2]}) - messages.append({"role": "user", "content": message.content}) + cached_conversations[message.author].add_message( + role=copeai_backend.conversation.Role.user, + content=message.content + ) + await message.channel.typing() typing.append(message.channel) - req = await openai.ChatCompletion.acreate( - model="gpt-3.5-turbo", - temperature=0.5, - max_tokens=max_token-(previous_tokens-200), - messages=messages + req = copeai_backend.generate.process_text_streaming( + conversation=cached_conversations[message.author], + model=copeai_backend.models.GPT_3, + new_message=message.content, + additional_args={ + "max_tokens": int(os.environ['MAX_TOKEN_PER_REQUEST']), + } ) + typing.remove(message.channel) response = req['choices'][0]['message']['content'] prompt_used_tokens = req['usage']['prompt_tokens'] diff --git a/views/GenerationState.py b/views/GenerationState.py new file mode 100644 index 0000000..66f7f20 --- /dev/null +++ b/views/GenerationState.py @@ -0,0 +1,21 @@ +from enum import Enum + +import discord + +class GenerationState(Enum): + GENERATING = "generating" + FINISHED = "finished" + +class GenerationStateButton(discord.ui.Button): + def __init__(self, label: str, style: discord.ButtonStyle, emoji: str | discord.Emoji | discord.PartialEmoji | None = None, disabled: bool=False): + super().__init__(label=label, style=style, emoji=emoji, disabled=disabled) + +class GenerationStateView(discord.ui.View): + def __init__(self, state: GenerationState): + super().__init__() + self.state = state + + if state == GenerationState.GENERATING: + self.add_item(GenerationStateButton(label="Generating...", style=discord.ButtonStyle.grey, emoji="✨", disabled=True)) + elif state == GenerationState.FINISHED: + pass