diff --git a/.gitsecret/keys/pubring.kbx b/.gitsecret/keys/pubring.kbx index 95dcf6f..c7ae599 100644 Binary files a/.gitsecret/keys/pubring.kbx and b/.gitsecret/keys/pubring.kbx differ diff --git a/.gitsecret/paths/mapping.cfg b/.gitsecret/paths/mapping.cfg index 576d1d7..bc9d978 100644 --- a/.gitsecret/paths/mapping.cfg +++ b/.gitsecret/paths/mapping.cfg @@ -1 +1 @@ -credentials.txt:1e17fa46dd8353b5ded588b32983ac7d800e70fd16bc5831663b9aaefc409011 +credentials.txt:272c4eb7f422bebcc5d0f1da8bde47016b185ba8cb6ca06639bb2a3e88ea9bc5 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dd2dd90..b12ed8a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,7 +25,7 @@ repos: rev: 4.0.1 hooks: - id: flake8 - args: ["--max-line-length", "88"] + args: ["--max-line-length", "88", "--extend-ignore", "E203"] - repo: https://github.com/myint/autoflake rev: v1.4 hooks: @@ -47,4 +47,5 @@ repos: rev: "v1.5.1" hooks: - id: mypy + additional_dependencies: [types-PyYAML==6.0.12.11] args: ["--check-untyped-defs", "--ignore-missing-imports"] diff --git a/credentials.txt.secret b/credentials.txt.secret index b7b1e45..2597a44 100644 Binary files a/credentials.txt.secret and b/credentials.txt.secret differ diff --git a/knowledgehub/contribs/promptui/base.py b/knowledgehub/contribs/promptui/base.py new file mode 100644 index 0000000..ac7c0f0 --- /dev/null +++ b/knowledgehub/contribs/promptui/base.py @@ -0,0 +1,20 @@ +import gradio as gr + +COMPONENTS_CLASS = { + "text": gr.components.Textbox, + "checkbox": gr.components.CheckboxGroup, + "dropdown": gr.components.Dropdown, + "file": gr.components.File, + "image": gr.components.Image, + "number": gr.components.Number, + "radio": gr.components.Radio, + "slider": gr.components.Slider, +} +SUPPORTED_COMPONENTS = set(COMPONENTS_CLASS.keys()) +DEFAULT_COMPONENT_BY_TYPES = { + "str": "text", + "bool": "checkbox", + "int": "number", + "float": "number", + "list": "dropdown", +} diff --git a/knowledgehub/contribs/promptui/config.py b/knowledgehub/contribs/promptui/config.py index 7e38ade..a581c75 100644 --- a/knowledgehub/contribs/promptui/config.py +++ b/knowledgehub/contribs/promptui/config.py @@ -1 +1,132 @@ """Get config from Pipeline""" +import inspect +from pathlib import Path +from typing import Any, Dict, Optional, Type, Union + +import yaml + +from ...base import BaseComponent +from .base import DEFAULT_COMPONENT_BY_TYPES + + +def config_from_value(value: Any) -> dict: + """Get the config from default value + + Args: + value (Any): default value + + Returns: + dict: config + """ + component = DEFAULT_COMPONENT_BY_TYPES.get(type(value).__name__, "text") + return { + "component": component, + "params": { + "value": value, + }, + } + + +def handle_param(param: dict) -> dict: + """Convert param definition into promptui-compliant config + + Supported gradio's UI components are (https://www.gradio.app/docs/components) + - CheckBoxGroup: list (multi select) + - DropDown: list (single select) + - File + - Image + - Number: int / float + - Radio: list (single select) + - Slider: int / float + - TextBox: str + """ + params = {} + default = param.get("default", None) + if isinstance(default, str) and default.startswith("{{") and default.endswith("}}"): + default = None + if default is not None: + params["value"] = default + + type_: str = type(default).__name__ if default is not None else "" + ui_component = DEFAULT_COMPONENT_BY_TYPES.get(type_, "text") + + return { + "component": ui_component, + "params": params, + } + + +def handle_node(node: dict) -> dict: + """Convert node definition into promptui-compliant config""" + config = {} + for name, param_def in node.get("params", {}).items(): + if isinstance(param_def["default_callback"], str): + continue + config[name] = handle_param(param_def) + for name, node_def in node.get("nodes", {}).items(): + if isinstance(node_def["default_callback"], str): + continue + for key, value in handle_node(node_def["default"]).items(): + config[f"{name}.{key}"] = value + for key, value in node_def["default_kwargs"].items(): + config[f"{name}.{key}"] = config_from_value(value) + + return config + + +def handle_input(pipeline: Union[BaseComponent, Type[BaseComponent]]) -> dict: + """Get the input from the pipeline""" + if not hasattr(pipeline, "run_raw"): + return {} + signature = inspect.signature(pipeline.run_raw) + inputs: Dict[str, Dict] = {} + for name, param in signature.parameters.items(): + if name in ["self", "args", "kwargs"]: + continue + input_def: Dict[str, Optional[Any]] = {"component": "text"} + default = param.default + if default is param.empty: + inputs[name] = input_def + continue + + params = {} + params["value"] = default + type_ = type(default).__name__ if default is not None else None + ui_component = None + if type_ is not None: + ui_component = "text" + + input_def["component"] = ui_component + input_def["params"] = params + + inputs[name] = input_def + + return inputs + + +def export_pipeline_to_config( + pipeline: Union[BaseComponent, Type[BaseComponent]], + path: Optional[str] = None, +) -> dict: + """Export a pipeline to a promptui-compliant config dict""" + if inspect.isclass(pipeline): + pipeline = pipeline() + + pipeline_def = pipeline.describe() + config = { + f"{pipeline.__module__}.{pipeline.__class__.__name__}": { + "params": handle_node(pipeline_def), + "inputs": handle_input(pipeline), + "outputs": [{"step": ".", "component": "text"}], + } + } + if path is not None: + old_config = config + if Path(path).is_file(): + with open(path) as f: + old_config = yaml.safe_load(f) + old_config.update(config) + with open(path, "w") as f: + yaml.safe_dump(old_config, f) + + return config diff --git a/knowledgehub/contribs/promptui/ui.py b/knowledgehub/contribs/promptui/ui.py index 912a033..2398a72 100644 --- a/knowledgehub/contribs/promptui/ui.py +++ b/knowledgehub/contribs/promptui/ui.py @@ -1,6 +1,151 @@ -"""Create UI from config file. Execute the UI from config file +from typing import Union -- Can do now: Log from stdout to UI -- In the future, we can provide some hooks and callbacks to let developers better -fine-tune the UI behavior. -""" +import gradio as gr +import yaml +from theflow.utils.modules import import_dotted_string + +from kotaemon.contribs.promptui.base import COMPONENTS_CLASS, SUPPORTED_COMPONENTS + +USAGE_INSTRUCTION = """In case of errors, you can: + +- Create bug fix and make PR at: https://github.com/Cinnamon/kotaemon +- Ping any of @john @tadashi @ian @jacky in Slack channel #llm-productization""" + + +def get_component(component_def: dict) -> gr.components.Component: + """Get the component based on component definition""" + component_cls = None + + if "component" in component_def: + component = component_def["component"] + if component not in SUPPORTED_COMPONENTS: + raise ValueError( + f"Unsupported UI component: {component}. " + f"Must be one of {SUPPORTED_COMPONENTS}" + ) + + component_cls = COMPONENTS_CLASS[component] + else: + raise ValueError( + f"Cannot decide the component from {component_def}. " + "Please specify `component` with 1 of the following " + f"values: {SUPPORTED_COMPONENTS}" + ) + + return component_cls(**component_def.get("params", {})) + + +def construct_ui(config, func_run, func_export) -> gr.Blocks: + """Create UI from config file. Execute the UI from config file + + - Can do now: Log from stdout to UI + - In the future, we can provide some hooks and callbacks to let developers better + fine-tune the UI behavior. + """ + inputs, outputs, params = [], [], [] + for name, component_def in config.get("inputs", {}).items(): + if "params" not in component_def: + component_def["params"] = {} + component_def["params"]["interactive"] = True + component = get_component(component_def) + if hasattr(component, "label") and not component.label: # type: ignore + component.label = name # type: ignore + + inputs.append(component) + + for name, component_def in config.get("params", {}).items(): + if "params" not in component_def: + component_def["params"] = {} + component_def["params"]["interactive"] = True + component = get_component(component_def) + if hasattr(component, "label") and not component.label: # type: ignore + component.label = name # type: ignore + + params.append(component) + + for idx, component_def in enumerate(config.get("outputs", [])): + if "params" not in component_def: + component_def["params"] = {} + component_def["params"]["interactive"] = False + component = get_component(component_def) + if hasattr(component, "label") and not component.label: # type: ignore + component.label = f"Output {idx}" + + outputs.append(component) + + temp = gr.Tab + with gr.Blocks(analytics_enabled=False, title="Welcome to PromptUI") as demo: + with gr.Accordion(label="Usage", open=False): + gr.Markdown(USAGE_INSTRUCTION) + with gr.Row(): + run_btn = gr.Button("Run") + run_btn.click(func_run, inputs=inputs + params, outputs=outputs) + export_btn = gr.Button("Export") + export_btn.click(func_export, inputs=None, outputs=None) + with gr.Row(): + with gr.Column(): + with temp("Inputs"): + for component in inputs: + component.render() + with temp("Params"): + for component in params: + component.render() + with gr.Column(): + for component in outputs: + component.render() + + return demo + + +def build_pipeline_ui(config: dict, pipeline_def): + """Build a tab from config file""" + inputs_name = list(config.get("inputs", {}).keys()) + params_name = list(config.get("params", {}).keys()) + outputs_def = config.get("outputs", []) + + def run_func(*args): + inputs = { + name: value for name, value in zip(inputs_name, args[: len(inputs_name)]) + } + params = { + name: value for name, value in zip(params_name, args[len(inputs_name) :]) + } + pipeline = pipeline_def() + pipeline.set(params) + pipeline(**inputs) + if outputs_def: + outputs = [] + for output_def in outputs_def: + output = pipeline.last_run.logs(output_def["step"]) + if "item" in output_def: + output = output[output_def["item"]] + outputs.append(output) + return outputs + + # TODO: export_func is None for now + return construct_ui(config, run_func, None) + + +def build_from_dict(config: Union[str, dict]): + """Build a full UI from YAML config file""" + + if isinstance(config, str): + with open(config) as f: + config_dict: dict = yaml.safe_load(f) + elif isinstance(config, dict): + config_dict = config + else: + raise ValueError( + f"config must be either a yaml path or a dict, got {type(config)}" + ) + + demos = [] + for key, value in config_dict.items(): + pipeline_def = import_dotted_string(key, safe=False) + demos.append(build_pipeline_ui(value, pipeline_def)) + if len(demos) == 1: + demo = demos[0] + else: + demo = gr.TabbedInterface(demos, list(config_dict.keys())) + + return demo diff --git a/knowledgehub/docstores/__init__.py b/knowledgehub/docstores/__init__.py index 88f6829..bee4fc5 100644 --- a/knowledgehub/docstores/__init__.py +++ b/knowledgehub/docstores/__init__.py @@ -1,4 +1,4 @@ from .base import BaseDocumentStore -from .simple import InMemoryDocumentStore +from .in_memory import InMemoryDocumentStore __all__ = ["BaseDocumentStore", "InMemoryDocumentStore"] diff --git a/knowledgehub/docstores/simple.py b/knowledgehub/docstores/in_memory.py similarity index 80% rename from knowledgehub/docstores/simple.py rename to knowledgehub/docstores/in_memory.py index 7f812e8..577363e 100644 --- a/knowledgehub/docstores/simple.py +++ b/knowledgehub/docstores/in_memory.py @@ -10,7 +10,7 @@ class InMemoryDocumentStore(BaseDocumentStore): """Simple memory document store that store document in a dictionary""" def __init__(self): - self.store = {} + self._store = {} def add( self, @@ -32,20 +32,20 @@ class InMemoryDocumentStore(BaseDocumentStore): docs = [docs] for doc_id, doc in zip(doc_ids, docs): - if doc_id in self.store and not exist_ok: + if doc_id in self._store and not exist_ok: raise ValueError(f"Document with id {doc_id} already exist") - self.store[doc_id] = doc + self._store[doc_id] = doc def get(self, ids: Union[List[str], str]) -> List[Document]: """Get document by id""" if not isinstance(ids, list): ids = [ids] - return [self.store[doc_id] for doc_id in ids] + return [self._store[doc_id] for doc_id in ids] def get_all(self) -> dict: """Get all documents""" - return self.store + return self._store def delete(self, ids: Union[List[str], str]): """Delete document by id""" @@ -53,11 +53,11 @@ class InMemoryDocumentStore(BaseDocumentStore): ids = [ids] for doc_id in ids: - del self.store[doc_id] + del self._store[doc_id] def save(self, path: Union[str, Path]): """Save document to path""" - store = {key: value.to_dict() for key, value in self.store.items()} + store = {key: value.to_dict() for key, value in self._store.items()} with open(path, "w") as f: json.dump(store, f) @@ -65,4 +65,4 @@ class InMemoryDocumentStore(BaseDocumentStore): """Load document store from path""" with open(path) as f: store = json.load(f) - self.store = {key: Document.from_dict(value) for key, value in store.items()} + self._store = {key: Document.from_dict(value) for key, value in store.items()} diff --git a/knowledgehub/documents/base.py b/knowledgehub/documents/base.py index 29bbcec..30f540d 100644 --- a/knowledgehub/documents/base.py +++ b/knowledgehub/documents/base.py @@ -1,4 +1,5 @@ from haystack.schema import Document as HaystackDocument +from llama_index.bridge.pydantic import Field from llama_index.schema import Document as BaseDocument SAMPLE_TEXT = "A sample Document from kotaemon" @@ -20,3 +21,17 @@ class Document(BaseDocument): metadata = self.metadata or {} text = self.text return HaystackDocument(content=text, meta=metadata) + + +class RetrievedDocument(Document): + """Subclass of Document with retrieval-related information + + Attributes: + score (float): score of the document (from 0.0 to 1.0) + retrieval_metadata (dict): metadata from the retrieval process, can be used + by different components in a retrieved pipeline to communicate with each + other + """ + + score: float = Field(default=0.0) + retrieval_metadata: dict = Field(default={}) diff --git a/knowledgehub/llms/completions/base.py b/knowledgehub/llms/completions/base.py index 6cbba52..0b03d72 100644 --- a/knowledgehub/llms/completions/base.py +++ b/knowledgehub/llms/completions/base.py @@ -27,7 +27,7 @@ class LangchainLLM(LLM): self._kwargs[param] = params.pop(param) super().__init__(**params) - @Param.decorate() + @Param.decorate(no_cache=True) def agent(self): return self._lc_class(**self._kwargs) diff --git a/knowledgehub/pipelines/indexing.py b/knowledgehub/pipelines/indexing.py index a3b0c72..1935268 100644 --- a/knowledgehub/pipelines/indexing.py +++ b/knowledgehub/pipelines/indexing.py @@ -1,8 +1,10 @@ -from typing import List +import uuid +from typing import List, Optional from theflow import Node, Param from ..base import BaseComponent +from ..docstores import BaseDocumentStore from ..documents.base import Document from ..embeddings import BaseEmbeddings from ..vectorstores import BaseVectorStore @@ -18,21 +20,30 @@ class IndexVectorStoreFromDocumentPipeline(BaseComponent): """ vector_store: Param[BaseVectorStore] = Param() + doc_store: Optional[BaseDocumentStore] = None embedding: Node[BaseEmbeddings] = Node() - # TODO: populate to document store as well when it's finished + # TODO: refer to llama_index's storage as well def run_raw(self, text: str) -> None: - self.vector_store.add([self.embedding(text)]) + document = Document(text=text, id_=str(uuid.uuid4())) + self.run_batch_document([document]) def run_batch_raw(self, text: List[str]) -> None: - self.vector_store.add(self.embedding(text)) + documents = [Document(t, id_=str(uuid.uuid4())) for t in text] + self.run_batch_document(documents) def run_document(self, text: Document) -> None: - self.vector_store.add([self.embedding(text)]) + self.run_batch_document([text]) def run_batch_document(self, text: List[Document]) -> None: - self.vector_store.add(self.embedding(text)) + embeddings = self.embedding(text) + self.vector_store.add( + embeddings=embeddings, + ids=[t.id_ for t in text], + ) + if self.doc_store: + self.doc_store.add(text) def is_document(self, text) -> bool: if isinstance(text, Document): diff --git a/knowledgehub/pipelines/retrieving.py b/knowledgehub/pipelines/retrieving.py index 4ba43ba..64d4e6b 100644 --- a/knowledgehub/pipelines/retrieving.py +++ b/knowledgehub/pipelines/retrieving.py @@ -1,47 +1,87 @@ -from typing import List +from abc import abstractmethod +from typing import List, Optional from theflow import Node, Param from ..base import BaseComponent -from ..documents.base import Document +from ..docstores import BaseDocumentStore +from ..documents.base import Document, RetrievedDocument from ..embeddings import BaseEmbeddings from ..vectorstores import BaseVectorStore -class RetrieveDocumentFromVectorStorePipeline(BaseComponent): +class BaseRetrieval(BaseComponent): + """Define the base interface of a retrieval pipeline""" + + @abstractmethod + def run_raw(self, text: str, top_k: int = 1) -> List[RetrievedDocument]: + ... + + @abstractmethod + def run_batch_raw( + self, text: List[str], top_k: int = 1 + ) -> List[List[RetrievedDocument]]: + ... + + @abstractmethod + def run_document(self, text: Document, top_k: int = 1) -> List[RetrievedDocument]: + ... + + @abstractmethod + def run_batch_document( + self, text: List[Document], top_k: int = 1 + ) -> List[List[RetrievedDocument]]: + ... + + +class RetrieveDocumentFromVectorStorePipeline(BaseRetrieval): """Retrieve list of documents from vector store""" vector_store: Param[BaseVectorStore] = Param() + doc_store: Optional[BaseDocumentStore] = None embedding: Node[BaseEmbeddings] = Node() - # TODO: populate to document store as well when it's finished # TODO: refer to llama_index's storage as well - def run_raw(self, text: str) -> List[str]: - emb = self.embedding(text) - return self.vector_store.query(embedding=emb)[2] + def run_raw(self, text: str, top_k: int = 1) -> List[RetrievedDocument]: + return self.run_batch_raw([text], top_k=top_k)[0] + + def run_batch_raw( + self, text: List[str], top_k: int = 1 + ) -> List[List[RetrievedDocument]]: + if self.doc_store is None: + raise ValueError( + "doc_store is not provided. Please provide a doc_store to " + "retrieve the documents" + ) - def run_batch_raw(self, text: List[str]) -> List[List[str]]: result = [] for each_text in text: emb = self.embedding(each_text) - result.append(self.vector_store.query(embedding=emb)[2]) + _, scores, ids = self.vector_store.query(embedding=emb, top_k=top_k) + docs = self.doc_store.get(ids) + each_result = [ + RetrievedDocument(**doc.to_dict(), score=score) + for doc, score in zip(docs, scores) + ] + result.append(each_result) return result - def run_document(self, text: Document) -> List[str]: - return self.run_raw(text.text) + def run_document(self, text: Document, top_k: int = 1) -> List[RetrievedDocument]: + return self.run_raw(text.text, top_k) - def run_batch_document(self, text: List[Document]) -> List[List[str]]: - input_text = [each.text for each in text] - return self.run_batch_raw(input_text) + def run_batch_document( + self, text: List[Document], top_k: int = 1 + ) -> List[List[RetrievedDocument]]: + return self.run_batch_raw(text=[t.text for t in text], top_k=top_k) - def is_document(self, text) -> bool: + def is_document(self, text, *args, **kwargs) -> bool: if isinstance(text, Document): return True elif isinstance(text, List) and isinstance(text[0], Document): return True return False - def is_batch(self, text) -> bool: + def is_batch(self, text, *args, **kwargs) -> bool: if isinstance(text, list): return True return False diff --git a/knowledgehub/vectorstores/base.py b/knowledgehub/vectorstores/base.py index ac66e6b..310c019 100644 --- a/knowledgehub/vectorstores/base.py +++ b/knowledgehub/vectorstores/base.py @@ -144,8 +144,8 @@ class LlamaIndexVectorStore(BaseVectorStore): query_embedding=embedding, similarity_top_k=top_k, node_ids=ids, + **kwargs, ), - **kwargs, ) embeddings = [] diff --git a/pytest.ini b/pytest.ini index 1127b02..819c6b0 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,7 +3,7 @@ minversion = 7.4.0 testpaths = tests addopts = -ra -q log_cli=true -log_level=DEBUG +log_level=WARNING log_format = %(asctime)s %(levelname)s %(message)s log_date_format = %Y-%m-%d %H:%M:%S log_file = logs/pytest-logs.txt diff --git a/setup.py b/setup.py index 91e9b67..819d363 100644 --- a/setup.py +++ b/setup.py @@ -34,6 +34,7 @@ setuptools.setup( "llama-index", "llama-hub", "nltk", + "gradio", ], extras_require={ "dev": [ diff --git a/tests/test_indexing_retrieval.py b/tests/test_indexing_retrieval.py index 1dc3377..c253c25 100644 --- a/tests/test_indexing_retrieval.py +++ b/tests/test_indexing_retrieval.py @@ -1,9 +1,11 @@ import json from pathlib import Path +from typing import cast import pytest from openai.api_resources.embedding import Embedding +from kotaemon.docstores import InMemoryDocumentStore from kotaemon.documents.base import Document from kotaemon.embeddings.openai import AzureOpenAIEmbeddings from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline @@ -21,6 +23,7 @@ def mock_openai_embedding(monkeypatch): def test_indexing(mock_openai_embedding, tmp_path): db = ChromaVectorStore(path=str(tmp_path)) + doc_store = InMemoryDocumentStore() embedding = AzureOpenAIEmbeddings( model="text-embedding-ada-002", deployment="embedding-deployment", @@ -29,15 +32,19 @@ def test_indexing(mock_openai_embedding, tmp_path): ) pipeline = IndexVectorStoreFromDocumentPipeline( - vector_store=db, embedding=embedding + vector_store=db, embedding=embedding, doc_store=doc_store ) + pipeline.doc_store = cast(InMemoryDocumentStore, pipeline.doc_store) assert pipeline.vector_store._collection.count() == 0, "Expected empty collection" + assert len(pipeline.doc_store._store) == 0, "Expected empty doc store" pipeline(text=Document(text="Hello world")) assert pipeline.vector_store._collection.count() == 1, "Index 1 item" + assert len(pipeline.doc_store._store) == 1, "Expected 1 document" def test_retrieving(mock_openai_embedding, tmp_path): db = ChromaVectorStore(path=str(tmp_path)) + doc_store = InMemoryDocumentStore() embedding = AzureOpenAIEmbeddings( model="text-embedding-ada-002", deployment="embedding-deployment", @@ -46,14 +53,14 @@ def test_retrieving(mock_openai_embedding, tmp_path): ) index_pipeline = IndexVectorStoreFromDocumentPipeline( - vector_store=db, embedding=embedding + vector_store=db, embedding=embedding, doc_store=doc_store ) retrieval_pipeline = RetrieveDocumentFromVectorStorePipeline( - vector_store=db, embedding=embedding + vector_store=db, doc_store=doc_store, embedding=embedding ) index_pipeline(text=Document(text="Hello world")) output = retrieval_pipeline(text=["Hello world", "Hello world"]) - assert len(output) == 2, "Expected 2 results" - assert output[0] == output[1], "Expected identical results" + assert len(output) == 2, "Expect 2 results" + assert output[0] == output[1], "Expect identical results" diff --git a/tests/test_promptui.py b/tests/test_promptui.py new file mode 100644 index 0000000..74468e9 --- /dev/null +++ b/tests/test_promptui.py @@ -0,0 +1,86 @@ +import pytest + +from kotaemon.contribs.promptui.config import export_pipeline_to_config +from kotaemon.contribs.promptui.ui import build_from_dict + + +@pytest.fixture() +def simple_pipeline_cls(tmp_path): + """Create a pipeline class that can be used""" + from typing import List + + from theflow import Node + + from kotaemon.base import BaseComponent + from kotaemon.embeddings import AzureOpenAIEmbeddings + from kotaemon.llms.completions.openai import AzureOpenAI + from kotaemon.pipelines.retrieving import ( + RetrieveDocumentFromVectorStorePipeline, + ) + from kotaemon.vectorstores import ChromaVectorStore + + class Pipeline(BaseComponent): + vectorstore_path: str = str(tmp_path) + llm: Node[AzureOpenAI] = Node( + default=AzureOpenAI, + default_kwargs={ + "openai_api_base": "https://test.openai.azure.com/", + "openai_api_key": "some-key", + "openai_api_version": "2023-03-15-preview", + "deployment_name": "gpt35turbo", + "temperature": 0, + "request_timeout": 60, + }, + ) + + @Node.decorate(depends_on=["vectorstore_path"]) + def retrieving_pipeline(self): + vector_store = ChromaVectorStore(self.vectorstore_path) + embedding = AzureOpenAIEmbeddings( + model="text-embedding-ada-002", + deployment="embedding-deployment", + openai_api_base="https://test.openai.azure.com/", + openai_api_key="some-key", + ) + + return RetrieveDocumentFromVectorStorePipeline( + vector_store=vector_store, embedding=embedding + ) + + def run_raw(self, text: str) -> str: + matched_texts: List[str] = self.retrieving_pipeline(text) + return self.llm("\n".join(matched_texts)).text[0] + + return Pipeline + + +Pipeline = simple_pipeline_cls + + +class TestPromptConfig: + def test_export_prompt_config(self, simple_pipeline_cls): + """Test if the prompt config is exported correctly""" + pipeline = simple_pipeline_cls() + config_dict = export_pipeline_to_config(pipeline) + config = list(config_dict.values())[0] + + assert "inputs" in config, "inputs should be in config" + assert "text" in config["inputs"], "inputs should have config" + + assert "params" in config, "params should be in config" + assert "vectorstore_path" in config["params"] + assert "llm.deployment_name" in config["params"] + assert "llm.openai_api_base" in config["params"] + assert "llm.openai_api_key" in config["params"] + assert "llm.openai_api_version" in config["params"] + assert "llm.request_timeout" in config["params"] + assert "llm.temperature" in config["params"] + + +class TestPromptUI: + def test_uigeneration(self, simple_pipeline_cls): + """Test if the gradio UI is exposed without any problem""" + pipeline = simple_pipeline_cls() + config = export_pipeline_to_config(pipeline) + + build_from_dict(config)