diff --git a/libs/ktem/ktem/index/file/index.py b/libs/ktem/ktem/index/file/index.py index 7fcdf16..49d29ef 100644 --- a/libs/ktem/ktem/index/file/index.py +++ b/libs/ktem/ktem/index/file/index.py @@ -16,7 +16,8 @@ from .base import BaseFileIndexIndexing, BaseFileIndexRetriever class FileIndex(BaseIndex): - """Index for the uploaded files + """ + File index to store and allow retrieval of files The file index stores files in a local folder and index them for retrieval. This file index provides the following infrastructure to support the indexing: @@ -303,30 +304,28 @@ class FileIndex(BaseIndex): "value": embedding_default, "component": "dropdown", "choices": embedding_choices, + "info": "The name of embedding model to use.", }, "supported_file_types": { "name": "Supported file types", - "value": ( - "image, .pdf, .txt, .csv, .xlsx, .doc, .docx, .pptx, .html, .zip" - ), + "value": ".pdf, .txt", "component": "text", + "info": "The file types that can be indexed, separated by comma.", }, "max_file_size": { - "name": "Max file size (MB) - set 0 to disable", + "name": "Max file size (MB)", "value": 1000, "component": "number", + "info": "The maximum size of file. Set 0 to disable.", }, "max_number_of_files": { - "name": "Max number of files that can be indexed - set 0 to disable", + "name": "Max number of files that can be indexed", "value": 0, "component": "number", - }, - "max_number_of_text_length": { - "name": ( - "Max amount of characters that can be indexed - set 0 to disable" + "info": ( + "The total number of files that can be indexed on the system. " + "Set 0 to disable." ), - "value": 0, - "component": "number", }, } diff --git a/libs/ktem/ktem/index/manager.py b/libs/ktem/ktem/index/manager.py index af1c8d4..40b8a34 100644 --- a/libs/ktem/ktem/index/manager.py +++ b/libs/ktem/ktem/index/manager.py @@ -53,13 +53,13 @@ class IndexManager: index = index_cls(app=self._app, id=id, name=name, config=config) index.on_create() - with Session(engine) as session: + with Session(engine) as sess: index_entry = Index( id=index.id, name=index.name, config=index.config, index_type=index_type ) - session.add(index_entry) - session.commit() - session.refresh(index_entry) + sess.add(index_entry) + sess.commit() + sess.refresh(index_entry) index.id = index_entry.id @@ -91,15 +91,13 @@ class IndexManager: bool: True if the index exists, False otherwise """ if id: - with Session(engine) as session: - index = session.get(Index, id) + with Session(engine) as sess: + index = sess.get(Index, id) return index is not None if name: - with Session(engine) as session: - index = session.exec( - select(Index).where(Index.name == name) - ).one_or_none() + with Session(engine) as sess: + index = sess.exec(select(Index).where(Index.name == name)).one_or_none() return index is not None return False @@ -117,11 +115,14 @@ class IndexManager: if not self.exists(index["id"]): self.build_index(**index) - with Session(engine) as session: - index_defs = session.exec(select(Index)) + with Session(engine) as sess: + index_defs = sess.exec(select(Index)) for index_def in index_defs: - self.start_index(**index_def.dict()) + self.start_index(**index_def.model_dump()) @property def indices(self): return self._indices + + def info(self): + return {index.id: index for index in self._indices} diff --git a/libs/ktem/ktem/index/ui.py b/libs/ktem/ktem/index/ui.py new file mode 100644 index 0000000..9c7d4dc --- /dev/null +++ b/libs/ktem/ktem/index/ui.py @@ -0,0 +1,184 @@ +import gradio as gr +import pandas as pd +import yaml +from ktem.app import BasePage + + +def format_description(cls): + user_settings = cls.get_admin_settings() + params_lines = ["| Name | Default | Description |", "| --- | --- | --- |"] + for key, value in user_settings.items(): + params_lines.append( + f"| {key} | {value.get('value', '')} | {value.get('info', '')} |" + ) + return f"{cls.__doc__}\n\n" + "\n".join(params_lines) + + +class IndexManagement(BasePage): + def __init__(self, app): + self._app = app + self.manager = app.index_manager + self.spec_desc_default = ( + "# Spec description\n\nSelect an index to view the spec description." + ) + self.on_building_ui() + + def on_building_ui(self): + with gr.Tab(label="View"): + self.index_list = gr.DataFrame( + headers=["ID", "Name", "Index Type"], + interactive=False, + ) + + with gr.Column(visible=False) as self._selected_panel: + self.selected_index_id = gr.Number(value=-1, visible=False) + with gr.Row(): + with gr.Column(): + self.edit_name = gr.Textbox( + label="Index name", + ) + self.edit_spec = gr.Textbox( + label="Specification", + info="Specification of the Index in YAML format", + lines=10, + ) + + gr.Markdown( + "IMPORTANT: Changing or deleting the name or " + "specification of the index will require restarting " + "the system. Some settings will require rebuilding " + "the index." + ) + with gr.Row(): + self.btn_edit_save = gr.Button( + "Save", min_width=10, variant="primary" + ) + self.btn_delete = gr.Button( + "Delete", min_width=10, variant="stop" + ) + with gr.Row(visible=False) as self._delete_confirm: + self.btn_delete_yes = gr.Button( + "Confirm Delete", + variant="stop", + min_width=10, + ) + self.btn_delete_no = gr.Button("Cancel", min_width=10) + self.btn_close = gr.Button("Close", min_width=10) + + with gr.Column(): + self.edit_spec_desc = gr.Markdown("# Spec description") + + def _on_app_created(self): + """Called when the app is created""" + self._app.app.load( + self.list_indices, + inputs=None, + outputs=[self.index_list], + ) + + def on_register_events(self): + self.index_list.select( + self.select_index, + inputs=self.index_list, + outputs=[self.selected_index_id], + show_progress="hidden", + ) + + self.selected_index_id.change( + self.on_change_selected_index, + inputs=[self.selected_index_id], + outputs=[ + self._selected_panel, + # edit section + self.edit_spec, + self.edit_spec_desc, + self.edit_name, + ], + show_progress="hidden", + ) + self.btn_delete.click( + lambda: ( + gr.update(visible=False), + gr.update(visible=False), + gr.update(visible=False), + gr.update(visible=True), + ), + inputs=None, + outputs=[ + self.btn_edit_save, + self.btn_delete, + self.btn_close, + self._delete_confirm, + ], + show_progress="hidden", + ) + self.btn_delete_no.click( + lambda: ( + gr.update(visible=True), + gr.update(visible=True), + gr.update(visible=True), + gr.update(visible=False), + ), + inputs=None, + outputs=[ + self.btn_edit_save, + self.btn_delete, + self.btn_close, + self._delete_confirm, + ], + show_progress="hidden", + ) + self.btn_close.click( + lambda: -1, + outputs=[self.selected_index_id], + ) + + def list_indices(self): + """List the indices constructed by the user""" + items = [] + for item in self.manager.indices: + record = {} + record["ID"] = item.id + record["Name"] = item.name + record["Index Type"] = item.__class__.__name__ + items.append(record) + + if items: + indices_list = pd.DataFrame.from_records(items) + else: + indices_list = pd.DataFrame.from_records( + [{"ID": "-", "Name": "-", "Index Type": "-"}] + ) + + return indices_list + + def select_index(self, index_list, ev: gr.SelectData) -> int: + """Return the index id""" + if ev.value == "-" and ev.index[0] == 0: + gr.Info("No index is constructed. Please create one first!") + return -1 + + if not ev.selected: + return -1 + + return int(index_list["ID"][ev.index[0]]) + + def on_change_selected_index(self, selected_index_id: int): + if selected_index_id == -1: + _selected_panel = gr.update(visible=False) + edit_spec = gr.update(value="") + edit_spec_desc = gr.update(value="") + edit_name = gr.update(value="") + else: + _selected_panel = gr.update(visible=True) + index = self.manager.info()[selected_index_id] + edit_spec = yaml.dump(index.config) + edit_spec_desc = format_description(index.__class__) + edit_name = index.name + + return ( + _selected_panel, + edit_spec, + edit_spec_desc, + edit_name, + ) diff --git a/libs/ktem/ktem/pages/resources/__init__.py b/libs/ktem/ktem/pages/resources/__init__.py index 5dcb9f0..c423d3f 100644 --- a/libs/ktem/ktem/pages/resources/__init__.py +++ b/libs/ktem/ktem/pages/resources/__init__.py @@ -2,6 +2,7 @@ import gradio as gr from ktem.app import BasePage from ktem.db.models import User, engine from ktem.embeddings.ui import EmbeddingManagement +from ktem.index.ui import IndexManagement from ktem.llms.ui import LLMManagement from sqlmodel import Session, select @@ -21,9 +22,12 @@ class ResourcesTab(BasePage): with gr.Tab("LLMs") as self.llm_management_tab: self.llm_management = LLMManagement(self._app) - with gr.Tab("Embedding Models") as self.llm_management_tab: + with gr.Tab("Embedding Models") as self.emb_management_tab: self.emb_management = EmbeddingManagement(self._app) + with gr.Tab("Index Management") as self.index_management_tab: + self.index_management = IndexManagement(self._app) + def on_subscribe_public_events(self): if self._app.f_user_management: self._app.subscribe_event( diff --git a/libs/ktem/pyproject.toml b/libs/ktem/pyproject.toml index 172498f..24096e0 100644 --- a/libs/ktem/pyproject.toml +++ b/libs/ktem/pyproject.toml @@ -17,11 +17,10 @@ dependencies = [ "platformdirs", "pluggy", "python-decouple", - "python-pptx", "sqlalchemy", "sqlmodel", "tiktoken", - "gradio>=4.0.0,<=4.22.0", + "gradio>=4.26.0", ] readme = "README.md" license = { text = "MIT License" }