From 21d4ad64f3d8e56600ba6414093a3453b04202a2 Mon Sep 17 00:00:00 2001
From: showdown76py <showdown76.a@gmail.com>
Date: Tue, 16 Jan 2024 21:07:39 +0100
Subject: [PATCH] feat: wip, v2

---
 copeai_backend/__init__.py                    |   3 +
 copeai_backend/conversation.py                | 102 ++++++++++++++++++
 .../exception/LockedConversationException.py  |   7 ++
 copeai_backend/exception/__init__.py          |   1 +
 copeai_backend/generate.py                    |  91 ++++++++++++++++
 copeai_backend/models.py                      |  17 +++
 main.py                                       |  73 ++++++++-----
 views/GenerationState.py                      |  21 ++++
 8 files changed, 289 insertions(+), 26 deletions(-)
 create mode 100644 copeai_backend/__init__.py
 create mode 100644 copeai_backend/conversation.py
 create mode 100644 copeai_backend/exception/LockedConversationException.py
 create mode 100644 copeai_backend/exception/__init__.py
 create mode 100644 copeai_backend/generate.py
 create mode 100644 copeai_backend/models.py
 create mode 100644 views/GenerationState.py

diff --git a/copeai_backend/__init__.py b/copeai_backend/__init__.py
new file mode 100644
index 0000000..08db791
--- /dev/null
+++ b/copeai_backend/__init__.py
@@ -0,0 +1,3 @@
+from .conversation import Conversation, ConversationResponse, Role
+from .generate import process_text_streaming, simple_process_text
+from .models import Model, Service, GPT_3, GPT_4
diff --git a/copeai_backend/conversation.py b/copeai_backend/conversation.py
new file mode 100644
index 0000000..c058bef
--- /dev/null
+++ b/copeai_backend/conversation.py
@@ -0,0 +1,102 @@
+from dataclasses import dataclass
+import typing
+from openai import AsyncStream
+from openai.types.chat import ChatCompletionChunk, ChatCompletion
+import tiktoken
+from enum import Enum
+
+from copeai_backend.exception import ConversationLockedException
+
+from . import models
+
+encoding = tiktoken.get_encoding("cl100k_base")
+
+BASE_PROMPT = ""
+
+class Role(Enum):
+    SYSTEM = "system"
+    USER = "user"
+    ASSISTANT = "assistant"
+
+
+@dataclass
+class GeneratingResponseChunk:
+    """A chunk of a response from the model. You receive this when the **generation is still going on**, and streamed."""
+
+    text: str
+    raw: ChatCompletionChunk
+
+
+class Conversation:
+    def __init__(self, add_base_prompt: bool = True, storage: dict = {}) -> None:
+        self.messages = []
+        self.last_used_model: models.Model | None = None
+        self.locked = False
+        self.interruput = False
+        self.store = storage
+
+        if add_base_prompt and BASE_PROMPT:
+            self.messages.append({"role": Role.SYSTEM.value, "content": BASE_PROMPT})
+
+    def add_message(self, role: Role, message, username: str | None = None):
+        if not self.locked:
+            d = {"role": role.value, "content": message}
+            if username:
+                d["name"] = username
+            self.messages.append(d)
+        else:
+            raise ConversationLockedException()
+
+    def interrupt(self):
+        """Interrupts any conversations going on."""
+        self.interruput = True
+
+    def get_tokens(self):
+        return text_to_tokens(self.messages)
+
+    def last_role(self):
+        return Role(self.messages[-1]["role"])
+
+    def last_message(self):
+        return self.messages[-1]["content"]
+
+
+@dataclass
+class ConversationResponse:
+    """A response from the generation. You receive this when the **generation is done**, or non-streamed requests."""
+
+    conversation: Conversation
+    text: str | list[str]
+    raw_response: list[ChatCompletion] | list[ChatCompletionChunk]
+
+def text_to_tokens(string_or_messages: str | list[str | dict | list] | Conversation) -> int:
+    """Returns the number of tokens in a text string."""
+    num_tokens = 0
+
+    messages = []
+    if isinstance(string_or_messages, str):
+        messages = [{"role": "user", "content": string_or_messages}]
+    else:
+        messages = string_or_messages
+
+    for message in messages:
+        # every message follows <im_start>{role/name}\n{content}<im_end>\n
+        num_tokens += 4
+
+        if isinstance(message, dict):
+            for key, value in message.items():
+                num_tokens += len(encoding.encode(str(value)))
+                if key == "name":  # if there's a name, the role is omitted
+                    num_tokens += 1  # role is always required and always 1 token
+        elif isinstance(message, list):
+            for item in message:
+                if item["type"] == "text":
+                    num_tokens += len(encoding.encode(item["text"]))
+        elif isinstance(message, str):
+            num_tokens += len(encoding.encode(message))
+        elif isinstance(messages, Conversation):
+            for message in messages.messages:
+                num_tokens += text_to_tokens(message["content"])
+    num_tokens += 2  # every reply is primed with <im_start>assistant
+
+    return num_tokens
\ No newline at end of file
diff --git a/copeai_backend/exception/LockedConversationException.py b/copeai_backend/exception/LockedConversationException.py
new file mode 100644
index 0000000..6054642
--- /dev/null
+++ b/copeai_backend/exception/LockedConversationException.py
@@ -0,0 +1,7 @@
+class ConversationLockedException(Exception):
+    """Raised when there is already an ongoing conversation."""
+
+    def __init__(self):
+        super().__init__(
+            "There is already an ongoing conversation. Please wait until it is finished."
+        )
diff --git a/copeai_backend/exception/__init__.py b/copeai_backend/exception/__init__.py
new file mode 100644
index 0000000..51a73e5
--- /dev/null
+++ b/copeai_backend/exception/__init__.py
@@ -0,0 +1 @@
+from .LockedConversationException import ConversationLockedException
diff --git a/copeai_backend/generate.py b/copeai_backend/generate.py
new file mode 100644
index 0000000..671cc80
--- /dev/null
+++ b/copeai_backend/generate.py
@@ -0,0 +1,91 @@
+import json
+import traceback
+from typing import Any, AsyncGenerator, Coroutine, Generator
+import requests
+import openai
+
+import asyncio
+from dotenv import load_dotenv
+import os
+from .conversation import (
+    Conversation,
+    Role,
+    ConversationResponse,
+    GeneratingResponseChunk,
+)
+from .models import Model
+from .exception import ConversationLockedException
+
+load_dotenv()
+
+oclient = openai.AsyncOpenAI(api_key=os.environ.get("OPENAI_KEY"))
+
+
+async def simple_process_text(
+    conversation: Conversation,
+    model: Model,
+    new_message: str,
+    additional_args: dict = {},
+) -> ConversationResponse:
+    conversation.add_message(Role.USER, new_message)
+    conversation.last_used_model = model
+    r = await oclient.chat.completions.create(
+        model=model.id, messages=conversation.messages, **additional_args
+    )
+    conversation.add_message(Role.ASSISTANT, r.choices[0].message.content)
+    return ConversationResponse(conversation, r.choices[0].message.content, r)
+
+
+async def process_text_streaming(
+    conversation: Conversation,
+    model: Model,
+    new_message: str,
+    additional_args: dict = {},
+) -> [ConversationResponse, GeneratingResponseChunk]: # FIXME change type
+    if conversation.locked:
+        raise ConversationLockedException()
+
+    try:
+        text_parts = []
+        resp_parts = []
+
+        conversation.add_message(
+            Role.USER,
+            new_message,
+            (additional_args["userid"] if "userid" in additional_args else "unknown"),
+        )
+        conversation.last_used_model = model
+        conversation.locked = True
+        if model.service == "openai":
+            response = await oclient.chat.completions.create(
+                model=model.id,
+                messages=conversation.messages,
+                temperature=0.9,
+                top_p=1.0,
+                presence_penalty=0.6,
+                frequency_penalty=0.0,
+                max_tokens=4096,
+                stream=True,
+            )
+
+            async for chunk in response:
+                partition = chunk.choices[0].delta
+                if (
+                    "content"
+                    in json.loads(chunk.model_dump_json())["choices"][0]["delta"].keys()
+                ):
+                    if partition.content is not None:
+                        text_parts.append(partition.content)
+                        resp_parts.append(chunk)
+                        yield GeneratingResponseChunk(partition.content, chunk)
+
+                    if conversation.interruput:
+                        yield ConversationResponse(conversation, text_parts, resp_parts)
+
+        conversation.locked = False
+
+        conversation.add_message(Role.ASSISTANT, ''.join(text_parts))
+        yield ConversationResponse(conversation, text_parts, resp_parts)
+    except Exception as e:
+        conversation.locked = False
+        raise e
diff --git a/copeai_backend/models.py b/copeai_backend/models.py
new file mode 100644
index 0000000..de9902c
--- /dev/null
+++ b/copeai_backend/models.py
@@ -0,0 +1,17 @@
+from dataclasses import dataclass
+from typing import Literal
+
+
+Service = Literal["openai", "bard"]
+
+
+@dataclass
+class Model:
+    id: str
+    usage_name: str
+    service: Service
+
+
+GPT_3 = Model(id="gpt-3.5-turbo-16k-0613", usage_name="GPT-3", service="openai")
+
+GPT_4 = Model(id="gpt-4-16k-0613", usage_name="GPT-4", service="openai")
diff --git a/main.py b/main.py
index 748d486..484d4cd 100644
--- a/main.py
+++ b/main.py
@@ -8,6 +8,9 @@ import openai
 import sqlite3
 import tiktoken
 from dotenv import load_dotenv
+from typing import Dict
+import copeai_backend
+import views.GenerationState
 
 load_dotenv()
 
@@ -19,6 +22,8 @@ intents.members = True
 intents.presences = True
 intents.dm_messages = True
 
+cached_conversations: Dict[discord.User, copeai_backend.conversation.Conversation] = {}
+
 class App(discord.Client):
     def __init__(self):
         super().__init__(intents=intents)
@@ -55,11 +60,6 @@ async def on_message(message: discord.Message):
     if not isinstance(message.channel, discord.DMChannel): return
     if message.author.id == app.user.id: return
     try:
-        c = db.cursor()
-        c.execute('SELECT * FROM message_history WHERE user_id = ? ORDER BY timestamp DESC', (message.author.id,))
-        msgs = c.fetchall()
-        message_token_usage = num_tokens_from_string(message.content)
-        max_token = int(os.environ['MAX_TOKEN_PER_REQUEST'])
         with open('base-prompt.txt', 'r', encoding='utf-8') as f:
             bprompt = f.read()
 
@@ -84,32 +84,53 @@ async def on_message(message: discord.Message):
         }
 
         for arg in arguments.keys(): bprompt = bprompt.replace(f'|{arg}|', arguments[arg])
-    
-        previous_tokens = 200+len(bprompt)+message_token_usage
-        # (message_id, user_id, content, token, role, timestamp)
-        # order by timestamp (most recent to least recent)
-        usable_messages = []
-        for msg in msgs:
-            d = previous_tokens + msg[3]
-            if d >= max_token:
-                break
-            previous_tokens += msg[3]
-            usable_messages.append(msg)
-        
-        usable_messages.reverse()
 
+        if message.author not in cached_conversations:
+            cached_conversations[message.author] = copeai_backend.conversation.Conversation()
+            c = db.cursor()
+            c.execute('SELECT * FROM message_history WHERE user_id = ? ORDER BY timestamp DESC', (message.author.id,))
+            msgs = c.fetchall()
+            message_token_usage = num_tokens_from_string(message.content)
+            max_token = int(os.environ['MAX_TOKEN_PER_REQUEST'])
+    
+            previous_tokens = 200+len(bprompt)+message_token_usage
+            # (message_id, user_id, content, token, role, timestamp)
+            # order by timestamp (most recent to least recent)
+            usable_messages = []
+            for msg in msgs:
+                d = previous_tokens + msg[3]
+                if d >= max_token:
+                    break
+                previous_tokens += msg[3]
+                usable_messages.append(msg)
+            
+            usable_messages.reverse()
+
+            
+            messages = [{"role": "system", "content": bprompt}]
+            for v in usable_messages: messages.append({"role": v[4], "content": v[2]})
+        else:
+            total_tokens = copeai_backend.conversation.text_to_tokens(cached_conversations[message.author])
+            while total_tokens > int(os.environ['MAX_TOKEN_PER_REQUEST']) - 400:
+                cached_conversations[message.author].messages.pop(0)
+                total_tokens = copeai_backend.conversation.text_to_tokens(cached_conversations[message.author])
         
-        messages = [{"role": "system", "content": bprompt}]
-        for v in usable_messages: messages.append({"role": v[4], "content": v[2]})
-        messages.append({"role": "user", "content": message.content})
+        cached_conversations[message.author].add_message(
+            role=copeai_backend.conversation.Role.user,
+            content=message.content
+        )
+
         await message.channel.typing()
         typing.append(message.channel)
-        req = await openai.ChatCompletion.acreate(
-            model="gpt-3.5-turbo",
-            temperature=0.5,
-            max_tokens=max_token-(previous_tokens-200),
-            messages=messages
+        req = copeai_backend.generate.process_text_streaming(
+            conversation=cached_conversations[message.author],
+            model=copeai_backend.models.GPT_3,
+            new_message=message.content,
+            additional_args={
+                "max_tokens": int(os.environ['MAX_TOKEN_PER_REQUEST']),
+            }
         )
+        
         typing.remove(message.channel)
         response = req['choices'][0]['message']['content']
         prompt_used_tokens = req['usage']['prompt_tokens']
diff --git a/views/GenerationState.py b/views/GenerationState.py
new file mode 100644
index 0000000..66f7f20
--- /dev/null
+++ b/views/GenerationState.py
@@ -0,0 +1,21 @@
+from enum import Enum
+
+import discord
+
+class GenerationState(Enum):
+    GENERATING = "generating"
+    FINISHED = "finished"
+
+class GenerationStateButton(discord.ui.Button):
+    def __init__(self, label: str, style: discord.ButtonStyle, emoji: str | discord.Emoji | discord.PartialEmoji | None = None, disabled: bool=False):
+        super().__init__(label=label, style=style, emoji=emoji, disabled=disabled)
+
+class GenerationStateView(discord.ui.View):
+    def __init__(self, state: GenerationState):
+        super().__init__()
+        self.state = state
+
+        if state == GenerationState.GENERATING:
+            self.add_item(GenerationStateButton(label="Generating...", style=discord.ButtonStyle.grey, emoji="✨", disabled=True))
+        elif state == GenerationState.FINISHED:
+            pass