1. Introduce the concept of "collection_name" to docstore and vector store. Each collection can be viewed similarly to a table in a SQL database. It allows better organizing information within this data source. 2. Move the `Index` and `Source` tables from the application scope into the index scope. For each new index created by user, these tables should increase accordingly. So it depends on the index, rather than the app. 3. Make each index responsible for the UI components in the app. 4. Construct the File UI page.
172 lines
4.7 KiB
Python
172 lines
4.7 KiB
Python
"""Common components, some kind of config"""
|
|
import logging
|
|
from functools import cache
|
|
from pathlib import Path
|
|
|
|
from theflow.settings import settings
|
|
from theflow.utils.modules import deserialize
|
|
|
|
from kotaemon.base import BaseComponent
|
|
from kotaemon.storages import BaseDocumentStore, BaseVectorStore
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
filestorage_path = Path(settings.KH_FILESTORAGE_PATH)
|
|
filestorage_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
@cache
|
|
def get_docstore(collection_name: str = "default") -> BaseDocumentStore:
|
|
from copy import deepcopy
|
|
|
|
ds_conf = deepcopy(settings.KH_DOCSTORE)
|
|
ds_conf["collection_name"] = collection_name
|
|
return deserialize(ds_conf, safe=False)
|
|
|
|
|
|
@cache
|
|
def get_vectorstore(collection_name: str = "default") -> BaseVectorStore:
|
|
from copy import deepcopy
|
|
|
|
vs_conf = deepcopy(settings.KH_VECTORSTORE)
|
|
vs_conf["collection_name"] = collection_name
|
|
return deserialize(vs_conf, safe=False)
|
|
|
|
|
|
class ModelPool:
|
|
"""Represent a pool of models"""
|
|
|
|
def __init__(self, category: str, conf: dict):
|
|
self._category = category
|
|
self._conf = conf
|
|
|
|
self._models: dict[str, BaseComponent] = {}
|
|
self._accuracy: list[str] = []
|
|
self._cost: list[str] = []
|
|
self._default: list[str] = []
|
|
|
|
for name, model in conf.items():
|
|
self._models[name] = deserialize(model["def"], safe=False)
|
|
if model.get("default", False):
|
|
self._default.append(name)
|
|
|
|
self._accuracy = list(
|
|
sorted(conf, key=lambda x: conf[x].get("accuracy", float("-inf")))
|
|
)
|
|
self._cost = list(sorted(conf, key=lambda x: conf[x].get("cost", float("inf"))))
|
|
|
|
def __getitem__(self, key: str) -> BaseComponent:
|
|
return self._models[key]
|
|
|
|
def __setitem__(self, key: str, value: BaseComponent):
|
|
self._models[key] = value
|
|
|
|
def settings(self) -> dict:
|
|
"""Present model pools option for gradio"""
|
|
return {
|
|
"label": self._category,
|
|
"choices": list(self._models.keys()),
|
|
"value": self.get_default_name(),
|
|
}
|
|
|
|
def options(self) -> dict:
|
|
"""Present a list of models"""
|
|
return self._models
|
|
|
|
def get_random_name(self) -> str:
|
|
"""Get the name of random model
|
|
|
|
Returns:
|
|
str: random model name in the pool
|
|
"""
|
|
import random
|
|
|
|
if not self._conf:
|
|
raise ValueError("No models in pool")
|
|
|
|
return random.choice(list(self._conf.keys()))
|
|
|
|
def get_default_name(self) -> str:
|
|
"""Get the name of default model
|
|
|
|
In case there is no default model, choose random model from pool. In
|
|
case there are multiple default models, choose random from them.
|
|
|
|
Returns:
|
|
str: model name
|
|
"""
|
|
if not self._conf:
|
|
raise ValueError("No models in pool")
|
|
|
|
if self._default:
|
|
import random
|
|
|
|
return random.choice(self._default)
|
|
|
|
return self.get_random_name()
|
|
|
|
def get_random(self) -> BaseComponent:
|
|
"""Get random model"""
|
|
return self._models[self.get_random_name()]
|
|
|
|
def get_default(self) -> BaseComponent:
|
|
"""Get default model
|
|
|
|
In case there is no default model, choose random model from pool. In
|
|
case there are multiple default models, choose random from them.
|
|
|
|
Returns:
|
|
BaseComponent: model
|
|
"""
|
|
return self._models[self.get_default_name()]
|
|
|
|
def get_highest_accuracy_name(self) -> str:
|
|
"""Get the name of model with highest accuracy
|
|
|
|
Returns:
|
|
str: model name
|
|
"""
|
|
if not self._conf:
|
|
raise ValueError("No models in pool")
|
|
return self._accuracy[-1]
|
|
|
|
def get_highest_accuracy(self) -> BaseComponent:
|
|
"""Get model with highest accuracy
|
|
|
|
Returns:
|
|
BaseComponent: model
|
|
"""
|
|
if not self._conf:
|
|
raise ValueError("No models in pool")
|
|
|
|
return self._models[self._accuracy[-1]]
|
|
|
|
def get_lowest_cost_name(self) -> str:
|
|
"""Get the name of model with lowest cost
|
|
|
|
Returns:
|
|
str: model name
|
|
"""
|
|
if not self._conf:
|
|
raise ValueError("No models in pool")
|
|
return self._cost[0]
|
|
|
|
def get_lowest_cost(self) -> BaseComponent:
|
|
"""Get model with lowest cost
|
|
|
|
Returns:
|
|
BaseComponent: model
|
|
"""
|
|
if not self._conf:
|
|
raise ValueError("No models in pool")
|
|
|
|
return self._models[self._cost[0]]
|
|
|
|
|
|
llms = ModelPool("LLMs", settings.KH_LLMS)
|
|
embeddings = ModelPool("Embeddings", settings.KH_EMBEDDINGS)
|
|
reasonings: dict = {}
|
|
tools = ModelPool("Tools", {})
|
|
indices = ModelPool("Indices", {})
|