Upgrade the declarative pipeline for cleaner interface (#51)
This commit is contained in:
committed by
GitHub
parent
aab982ddc4
commit
9035e25666
@@ -13,3 +13,17 @@ def mock_google_search(monkeypatch):
|
||||
)
|
||||
|
||||
monkeypatch.setattr(googlesearch, "search", result)
|
||||
|
||||
|
||||
def if_haystack_not_installed():
|
||||
try:
|
||||
import haystack # noqa: F401
|
||||
except ImportError:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
skip_when_haystack_not_installed = pytest.mark.skipif(
|
||||
if_haystack_not_installed(), reason="Haystack is not installed"
|
||||
)
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import tempfile
|
||||
from typing import List
|
||||
|
||||
from theflow import Node
|
||||
from theflow.utils.modules import ObjectInitDeclaration as _
|
||||
|
||||
from kotaemon.base import BaseComponent
|
||||
from kotaemon.embeddings import AzureOpenAIEmbeddings
|
||||
@@ -11,33 +11,27 @@ from kotaemon.vectorstores import ChromaVectorStore
|
||||
|
||||
|
||||
class Pipeline(BaseComponent):
|
||||
vectorstore_path: str = str(tempfile.mkdtemp())
|
||||
llm: Node[AzureOpenAI] = Node(
|
||||
default=AzureOpenAI,
|
||||
default_kwargs={
|
||||
"openai_api_base": "https://test.openai.azure.com/",
|
||||
"openai_api_key": "some-key",
|
||||
"openai_api_version": "2023-03-15-preview",
|
||||
"deployment_name": "gpt35turbo",
|
||||
"temperature": 0,
|
||||
"request_timeout": 60,
|
||||
},
|
||||
llm: AzureOpenAI = AzureOpenAI.withx(
|
||||
openai_api_base="https://test.openai.azure.com/",
|
||||
openai_api_key="some-key",
|
||||
openai_api_version="2023-03-15-preview",
|
||||
deployment_name="gpt35turbo",
|
||||
temperature=0,
|
||||
request_timeout=60,
|
||||
)
|
||||
|
||||
@Node.decorate(depends_on=["vectorstore_path"])
|
||||
def retrieving_pipeline(self):
|
||||
vector_store = ChromaVectorStore(self.vectorstore_path)
|
||||
embedding = AzureOpenAIEmbeddings(
|
||||
model="text-embedding-ada-002",
|
||||
deployment="embedding-deployment",
|
||||
openai_api_base="https://test.openai.azure.com/",
|
||||
openai_api_key="some-key",
|
||||
)
|
||||
|
||||
return RetrieveDocumentFromVectorStorePipeline(
|
||||
vector_store=vector_store, embedding=embedding
|
||||
retrieving_pipeline: RetrieveDocumentFromVectorStorePipeline = (
|
||||
RetrieveDocumentFromVectorStorePipeline.withx(
|
||||
vector_store=_(ChromaVectorStore).withx(path=str(tempfile.mkdtemp())),
|
||||
embedding=AzureOpenAIEmbeddings.withx(
|
||||
model="text-embedding-ada-002",
|
||||
deployment="embedding-deployment",
|
||||
openai_api_base="https://test.openai.azure.com/",
|
||||
openai_api_key="some-key",
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
def run_raw(self, text: str) -> str:
|
||||
matched_texts: List[str] = self.retrieving_pipeline(text)
|
||||
return self.llm("\n".join(matched_texts)).text[0]
|
||||
return self.llm("\n".join(matched_texts)).text
|
||||
|
@@ -1,7 +1,7 @@
|
||||
from haystack.schema import Document as HaystackDocument
|
||||
|
||||
from kotaemon.documents.base import Document, RetrievedDocument
|
||||
|
||||
from .conftest import skip_when_haystack_not_installed
|
||||
|
||||
|
||||
def test_document_constructor_with_builtin_types():
|
||||
for value in ["str", 1, {}, set(), [], tuple, None]:
|
||||
@@ -19,7 +19,10 @@ def test_document_constructor_with_document():
|
||||
assert doc2.content == doc1.content
|
||||
|
||||
|
||||
@skip_when_haystack_not_installed
|
||||
def test_document_to_haystack_format():
|
||||
from haystack.schema import Document as HaystackDocument
|
||||
|
||||
text = "Sample text"
|
||||
metadata = {"filename": "sample.txt"}
|
||||
doc = Document(text, metadata=metadata)
|
||||
|
@@ -16,7 +16,6 @@ class TestPromptConfig:
|
||||
assert "text" in config["inputs"], "inputs should have config"
|
||||
|
||||
assert "params" in config, "params should be in config"
|
||||
assert "vectorstore_path" in config["params"]
|
||||
assert "llm.deployment_name" in config["params"]
|
||||
assert "llm.openai_api_base" in config["params"]
|
||||
assert "llm.openai_api_key" in config["params"]
|
||||
|
@@ -42,8 +42,9 @@ def mock_openai_embedding(monkeypatch):
|
||||
)
|
||||
def test_ingest_pipeline(patch, mock_openai_embedding, tmp_path):
|
||||
indexing_pipeline = ReaderIndexingPipeline(
|
||||
storage=tmp_path, openai_api_key="some-key"
|
||||
storage_path=tmp_path,
|
||||
)
|
||||
indexing_pipeline.embedding.openai_api_key = "some-key"
|
||||
input_file_path = Path(__file__).parent / "resources/dummy.pdf"
|
||||
|
||||
# call ingestion pipeline
|
||||
|
@@ -3,7 +3,7 @@ from pathlib import Path
|
||||
from langchain.schema import Document as LangchainDocument
|
||||
from llama_index.node_parser import SimpleNodeParser
|
||||
|
||||
from kotaemon.documents.base import Document, HaystackDocument
|
||||
from kotaemon.documents.base import Document
|
||||
from kotaemon.loaders import AutoReader
|
||||
|
||||
|
||||
@@ -19,10 +19,6 @@ def test_pdf_reader():
|
||||
assert isinstance(first_doc, Document)
|
||||
assert first_doc.text.lower().replace(" ", "") == "dummypdffile"
|
||||
|
||||
# check conversion output
|
||||
haystack_doc = first_doc.to_haystack_format()
|
||||
assert isinstance(haystack_doc, HaystackDocument)
|
||||
|
||||
langchain_doc = first_doc.to_langchain_format()
|
||||
assert isinstance(langchain_doc, LangchainDocument)
|
||||
|
||||
|
@@ -3,6 +3,8 @@ import sys
|
||||
|
||||
import pytest
|
||||
|
||||
from .conftest import skip_when_haystack_not_installed
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def clean_artifacts_for_telemetry():
|
||||
@@ -26,6 +28,7 @@ def clean_artifacts_for_telemetry():
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("clean_artifacts_for_telemetry")
|
||||
@skip_when_haystack_not_installed
|
||||
def test_disable_telemetry_import_haystack_first():
|
||||
"""Test that telemetry is disabled when kotaemon lib is initiated after"""
|
||||
import os
|
||||
@@ -42,6 +45,7 @@ def test_disable_telemetry_import_haystack_first():
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("clean_artifacts_for_telemetry")
|
||||
@skip_when_haystack_not_installed
|
||||
def test_disable_telemetry_import_haystack_after_kotaemon():
|
||||
"""Test that telemetry is disabled when kotaemon lib is initiated before"""
|
||||
import os
|
||||
|
Reference in New Issue
Block a user