[AUR-338, AUR-406, AUR-407] Export pipeline to config for PromptUI. Construct PromptUI dynamically based on config. (#16)

From pipeline > config > UI. Provide example project for promptui - Pipeline to config: `kotaemon.contribs.promptui.config.export_pipeline_to_config`. The config follows schema specified in this document: https://cinnamon-ai.atlassian.net/wiki/spaces/ATM/pages/2748711193/Technical+Detail. Note: this implementation exclude the logs, which will be handled in AUR-408. - Config to UI: `kotaemon.contribs.promptui.build_from_yaml` - Example project is located at `examples/promptui/`
2023-09-21 14:27:23 +07:00
parent c329c4c03f
commit c6dd01e820
18 changed files with 503 additions and 46 deletions
--- a/tests/test_indexing_retrieval.py
+++ b/tests/test_indexing_retrieval.py
@@ -1,9 +1,11 @@
 import json
 from pathlib import Path
+from typing import cast

 import pytest
 from openai.api_resources.embedding import Embedding

+from kotaemon.docstores import InMemoryDocumentStore
 from kotaemon.documents.base import Document
 from kotaemon.embeddings.openai import AzureOpenAIEmbeddings
 from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
@@ -21,6 +23,7 @@ def mock_openai_embedding(monkeypatch):

 def test_indexing(mock_openai_embedding, tmp_path):
    db = ChromaVectorStore(path=str(tmp_path))
+    doc_store = InMemoryDocumentStore()
    embedding = AzureOpenAIEmbeddings(
        model="text-embedding-ada-002",
        deployment="embedding-deployment",
@@ -29,15 +32,19 @@ def test_indexing(mock_openai_embedding, tmp_path):
    )

    pipeline = IndexVectorStoreFromDocumentPipeline(
-        vector_store=db, embedding=embedding
+        vector_store=db, embedding=embedding, doc_store=doc_store
    )
+    pipeline.doc_store = cast(InMemoryDocumentStore, pipeline.doc_store)
    assert pipeline.vector_store._collection.count() == 0, "Expected empty collection"
+    assert len(pipeline.doc_store._store) == 0, "Expected empty doc store"
    pipeline(text=Document(text="Hello world"))
    assert pipeline.vector_store._collection.count() == 1, "Index 1 item"
+    assert len(pipeline.doc_store._store) == 1, "Expected 1 document"


 def test_retrieving(mock_openai_embedding, tmp_path):
    db = ChromaVectorStore(path=str(tmp_path))
+    doc_store = InMemoryDocumentStore()
    embedding = AzureOpenAIEmbeddings(
        model="text-embedding-ada-002",
        deployment="embedding-deployment",
@@ -46,14 +53,14 @@ def test_retrieving(mock_openai_embedding, tmp_path):
    )

    index_pipeline = IndexVectorStoreFromDocumentPipeline(
-        vector_store=db, embedding=embedding
+        vector_store=db, embedding=embedding, doc_store=doc_store
    )
    retrieval_pipeline = RetrieveDocumentFromVectorStorePipeline(
-        vector_store=db, embedding=embedding
+        vector_store=db, doc_store=doc_store, embedding=embedding
    )

    index_pipeline(text=Document(text="Hello world"))
    output = retrieval_pipeline(text=["Hello world", "Hello world"])

-    assert len(output) == 2, "Expected 2 results"
-    assert output[0] == output[1], "Expected identical results"
+    assert len(output) == 2, "Expect 2 results"
+    assert output[0] == output[1], "Expect identical results"
--- a/tests/test_promptui.py
+++ b/tests/test_promptui.py
@@ -0,0 +1,86 @@
+import pytest
+
+from kotaemon.contribs.promptui.config import export_pipeline_to_config
+from kotaemon.contribs.promptui.ui import build_from_dict
+
+
+@pytest.fixture()
+def simple_pipeline_cls(tmp_path):
+    """Create a pipeline class that can be used"""
+    from typing import List
+
+    from theflow import Node
+
+    from kotaemon.base import BaseComponent
+    from kotaemon.embeddings import AzureOpenAIEmbeddings
+    from kotaemon.llms.completions.openai import AzureOpenAI
+    from kotaemon.pipelines.retrieving import (
+        RetrieveDocumentFromVectorStorePipeline,
+    )
+    from kotaemon.vectorstores import ChromaVectorStore
+
+    class Pipeline(BaseComponent):
+        vectorstore_path: str = str(tmp_path)
+        llm: Node[AzureOpenAI] = Node(
+            default=AzureOpenAI,
+            default_kwargs={
+                "openai_api_base": "https://test.openai.azure.com/",
+                "openai_api_key": "some-key",
+                "openai_api_version": "2023-03-15-preview",
+                "deployment_name": "gpt35turbo",
+                "temperature": 0,
+                "request_timeout": 60,
+            },
+        )
+
+        @Node.decorate(depends_on=["vectorstore_path"])
+        def retrieving_pipeline(self):
+            vector_store = ChromaVectorStore(self.vectorstore_path)
+            embedding = AzureOpenAIEmbeddings(
+                model="text-embedding-ada-002",
+                deployment="embedding-deployment",
+                openai_api_base="https://test.openai.azure.com/",
+                openai_api_key="some-key",
+            )
+
+            return RetrieveDocumentFromVectorStorePipeline(
+                vector_store=vector_store, embedding=embedding
+            )
+
+        def run_raw(self, text: str) -> str:
+            matched_texts: List[str] = self.retrieving_pipeline(text)
+            return self.llm("\n".join(matched_texts)).text[0]
+
+    return Pipeline
+
+
+Pipeline = simple_pipeline_cls
+
+
+class TestPromptConfig:
+    def test_export_prompt_config(self, simple_pipeline_cls):
+        """Test if the prompt config is exported correctly"""
+        pipeline = simple_pipeline_cls()
+        config_dict = export_pipeline_to_config(pipeline)
+        config = list(config_dict.values())[0]
+
+        assert "inputs" in config, "inputs should be in config"
+        assert "text" in config["inputs"], "inputs should have config"
+
+        assert "params" in config, "params should be in config"
+        assert "vectorstore_path" in config["params"]
+        assert "llm.deployment_name" in config["params"]
+        assert "llm.openai_api_base" in config["params"]
+        assert "llm.openai_api_key" in config["params"]
+        assert "llm.openai_api_version" in config["params"]
+        assert "llm.request_timeout" in config["params"]
+        assert "llm.temperature" in config["params"]
+
+
+class TestPromptUI:
+    def test_uigeneration(self, simple_pipeline_cls):
+        """Test if the gradio UI is exposed without any problem"""
+        pipeline = simple_pipeline_cls()
+        config = export_pipeline_to_config(pipeline)
+
+        build_from_dict(config)