[AUR-408] Export logs to Excel (#23)

This CL implements:

- The logic to export log to Excel.
- Route the export logic in the UI.
- Demonstrate this functionality in `./examples/promptui` project.
This commit is contained in:
Nguyen Trung Duc (john)
2023-09-25 17:20:03 +07:00
committed by GitHub
parent 08b6e5d3fb
commit 4f189dc931
5 changed files with 265 additions and 64 deletions

43
tests/simple_pipeline.py Normal file
View File

@@ -0,0 +1,43 @@
import tempfile
from typing import List
from theflow import Node
from kotaemon.base import BaseComponent
from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.llms.completions.openai import AzureOpenAI
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
from kotaemon.vectorstores import ChromaVectorStore
class Pipeline(BaseComponent):
vectorstore_path: str = str(tempfile.mkdtemp())
llm: Node[AzureOpenAI] = Node(
default=AzureOpenAI,
default_kwargs={
"openai_api_base": "https://test.openai.azure.com/",
"openai_api_key": "some-key",
"openai_api_version": "2023-03-15-preview",
"deployment_name": "gpt35turbo",
"temperature": 0,
"request_timeout": 60,
},
)
@Node.decorate(depends_on=["vectorstore_path"])
def retrieving_pipeline(self):
vector_store = ChromaVectorStore(self.vectorstore_path)
embedding = AzureOpenAIEmbeddings(
model="text-embedding-ada-002",
deployment="embedding-deployment",
openai_api_base="https://test.openai.azure.com/",
openai_api_key="some-key",
)
return RetrieveDocumentFromVectorStorePipeline(
vector_store=vector_store, embedding=embedding
)
def run_raw(self, text: str) -> str:
matched_texts: List[str] = self.retrieving_pipeline(text)
return self.llm("\n".join(matched_texts)).text[0]

View File

@@ -1,66 +1,14 @@
import pytest
from kotaemon.contribs.promptui.config import export_pipeline_to_config
from kotaemon.contribs.promptui.export import export_from_dict
from kotaemon.contribs.promptui.ui import build_from_dict
@pytest.fixture()
def simple_pipeline_cls(tmp_path):
"""Create a pipeline class that can be used"""
from typing import List
from theflow import Node
from kotaemon.base import BaseComponent
from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.llms.completions.openai import AzureOpenAI
from kotaemon.pipelines.retrieving import (
RetrieveDocumentFromVectorStorePipeline,
)
from kotaemon.vectorstores import ChromaVectorStore
class Pipeline(BaseComponent):
vectorstore_path: str = str(tmp_path)
llm: Node[AzureOpenAI] = Node(
default=AzureOpenAI,
default_kwargs={
"openai_api_base": "https://test.openai.azure.com/",
"openai_api_key": "some-key",
"openai_api_version": "2023-03-15-preview",
"deployment_name": "gpt35turbo",
"temperature": 0,
"request_timeout": 60,
},
)
@Node.decorate(depends_on=["vectorstore_path"])
def retrieving_pipeline(self):
vector_store = ChromaVectorStore(self.vectorstore_path)
embedding = AzureOpenAIEmbeddings(
model="text-embedding-ada-002",
deployment="embedding-deployment",
openai_api_base="https://test.openai.azure.com/",
openai_api_key="some-key",
)
return RetrieveDocumentFromVectorStorePipeline(
vector_store=vector_store, embedding=embedding
)
def run_raw(self, text: str) -> str:
matched_texts: List[str] = self.retrieving_pipeline(text)
return self.llm("\n".join(matched_texts)).text[0]
return Pipeline
Pipeline = simple_pipeline_cls
from .simple_pipeline import Pipeline
class TestPromptConfig:
def test_export_prompt_config(self, simple_pipeline_cls):
def test_export_prompt_config(self):
"""Test if the prompt config is exported correctly"""
pipeline = simple_pipeline_cls()
pipeline = Pipeline()
config_dict = export_pipeline_to_config(pipeline)
config = list(config_dict.values())[0]
@@ -78,9 +26,42 @@ class TestPromptConfig:
class TestPromptUI:
def test_uigeneration(self, simple_pipeline_cls):
def test_uigeneration(self):
"""Test if the gradio UI is exposed without any problem"""
pipeline = simple_pipeline_cls()
pipeline = Pipeline()
config = export_pipeline_to_config(pipeline)
build_from_dict(config)
class TestExport:
def test_export(self, tmp_path):
"""Test if the export functionality works without error"""
from pathlib import Path
import yaml
from theflow.storage import storage
config_path = tmp_path / "config.yaml"
pipeline = Pipeline()
Path(storage.url(pipeline.config.store_result)).mkdir(
parents=True, exist_ok=True
)
config_dict = export_pipeline_to_config(pipeline)
pipeline_name = list(config_dict.keys())[0]
config_dict[pipeline_name]["logs"] = {
"sheet1": {
"inputs": [{"name": "text", "step": ".", "variable": "text"}],
"outputs": [{"name": "answer", "step": "."}],
},
}
with open(config_path, "w") as f:
yaml.safe_dump(config_dict, f)
export_from_dict(
config=str(config_path),
pipeline=pipeline_name,
output_path=str(tmp_path / "exported.xlsx"),
)