fix: re-enable tests and fix legacy test interface (#208)
* fix: re-enable tests and fix legacy test interface * fix: skip llamacpp based on installed status * fix: minor fix
This commit is contained in:
parent
92f6b8e1bf
commit
76f2652d2a
4
.github/workflows/unit-test.yaml
vendored
4
.github/workflows/unit-test.yaml
vendored
|
@ -11,9 +11,9 @@ env:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
unit-test:
|
unit-test:
|
||||||
if: false # temporary disable this job due to legacy interface
|
# if: false # temporary disable this job due to legacy interface
|
||||||
#TODO: enable this job after the new interface is ready
|
#TODO: enable this job after the new interface is ready
|
||||||
# if: ${{ !cancelled() }}
|
if: ${{ !cancelled() }}
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
timeout-minutes: 20
|
timeout-minutes: 20
|
||||||
defaults:
|
defaults:
|
||||||
|
|
|
@ -3,6 +3,7 @@ from typing import Type
|
||||||
|
|
||||||
from decouple import config
|
from decouple import config
|
||||||
from llama_index.core.readers.base import BaseReader
|
from llama_index.core.readers.base import BaseReader
|
||||||
|
from llama_index.readers.file import PDFReader
|
||||||
from theflow.settings import settings as flowsettings
|
from theflow.settings import settings as flowsettings
|
||||||
|
|
||||||
from kotaemon.base import BaseComponent, Document, Param
|
from kotaemon.base import BaseComponent, Document, Param
|
||||||
|
@ -91,7 +92,7 @@ class DocumentIngestor(BaseComponent):
|
||||||
file_extractors[ext] = cls()
|
file_extractors[ext] = cls()
|
||||||
|
|
||||||
if self.pdf_mode == "normal":
|
if self.pdf_mode == "normal":
|
||||||
pass # use default loader of llama-index which is pypdf
|
file_extractors[".pdf"] = PDFReader()
|
||||||
elif self.pdf_mode == "ocr":
|
elif self.pdf_mode == "ocr":
|
||||||
file_extractors[".pdf"] = OCRReader()
|
file_extractors[".pdf"] = OCRReader()
|
||||||
elif self.pdf_mode == "multimodal":
|
elif self.pdf_mode == "multimodal":
|
||||||
|
|
|
@ -24,6 +24,62 @@ def if_haystack_not_installed():
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def if_sentence_bert_not_installed():
|
||||||
|
try:
|
||||||
|
import sentence_transformers # noqa: F401
|
||||||
|
except ImportError:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def if_sentence_fastembed_not_installed():
|
||||||
|
try:
|
||||||
|
import fastembed # noqa: F401
|
||||||
|
except ImportError:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def if_unstructured_not_installed():
|
||||||
|
try:
|
||||||
|
import unstructured # noqa: F401
|
||||||
|
except ImportError:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def if_llama_cpp_not_installed():
|
||||||
|
try:
|
||||||
|
import llama_cpp # noqa: F401
|
||||||
|
except ImportError:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
skip_when_haystack_not_installed = pytest.mark.skipif(
|
skip_when_haystack_not_installed = pytest.mark.skipif(
|
||||||
if_haystack_not_installed(), reason="Haystack is not installed"
|
if_haystack_not_installed(), reason="Haystack is not installed"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
skip_when_sentence_bert_not_installed = pytest.mark.skipif(
|
||||||
|
if_sentence_bert_not_installed(), reason="SBert is not installed"
|
||||||
|
)
|
||||||
|
|
||||||
|
skip_when_fastembed_not_installed = pytest.mark.skipif(
|
||||||
|
if_sentence_fastembed_not_installed(), reason="fastembed is not installed"
|
||||||
|
)
|
||||||
|
|
||||||
|
skip_when_unstructured_not_installed = pytest.mark.skipif(
|
||||||
|
if_unstructured_not_installed(), reason="unstructured is not installed"
|
||||||
|
)
|
||||||
|
|
||||||
|
skip_openai_lc_wrapper_test = pytest.mark.skipif(
|
||||||
|
True, reason="OpenAI LC wrapper test is skipped"
|
||||||
|
)
|
||||||
|
|
||||||
|
skip_llama_cpp_not_installed = pytest.mark.skipif(
|
||||||
|
if_llama_cpp_not_installed(), reason="llama_cpp is not installed"
|
||||||
|
)
|
||||||
|
|
|
@ -13,7 +13,9 @@ from kotaemon.agents import (
|
||||||
RewooAgent,
|
RewooAgent,
|
||||||
WikipediaTool,
|
WikipediaTool,
|
||||||
)
|
)
|
||||||
from kotaemon.llms import LCAzureChatOpenAI
|
from kotaemon.llms import AzureChatOpenAI
|
||||||
|
|
||||||
|
from .conftest import skip_openai_lc_wrapper_test
|
||||||
|
|
||||||
FINAL_RESPONSE_TEXT = "Final Answer: Hello Cinnamon AI!"
|
FINAL_RESPONSE_TEXT = "Final Answer: Hello Cinnamon AI!"
|
||||||
REWOO_VALID_PLAN = (
|
REWOO_VALID_PLAN = (
|
||||||
|
@ -112,12 +114,11 @@ _openai_chat_completion_responses_react_langchain_tool = [
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def llm():
|
def llm():
|
||||||
return LCAzureChatOpenAI(
|
return AzureChatOpenAI(
|
||||||
azure_endpoint="https://dummy.openai.azure.com/",
|
api_key="dummy",
|
||||||
openai_api_key="dummy",
|
api_version="2024-05-01-preview",
|
||||||
openai_api_version="2023-03-15-preview",
|
azure_deployment="gpt-4o",
|
||||||
deployment_name="dummy-q2",
|
azure_endpoint="https://test.openai.azure.com/",
|
||||||
temperature=0,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -175,6 +176,7 @@ def test_react_agent(openai_completion, llm, mock_google_search):
|
||||||
assert response.text == FINAL_RESPONSE_TEXT
|
assert response.text == FINAL_RESPONSE_TEXT
|
||||||
|
|
||||||
|
|
||||||
|
@skip_openai_lc_wrapper_test
|
||||||
@patch(
|
@patch(
|
||||||
"openai.resources.chat.completions.Completions.create",
|
"openai.resources.chat.completions.Completions.create",
|
||||||
side_effect=_openai_chat_completion_responses_react,
|
side_effect=_openai_chat_completion_responses_react,
|
||||||
|
@ -199,6 +201,7 @@ def test_react_agent_langchain(openai_completion, llm, mock_google_search):
|
||||||
assert response
|
assert response
|
||||||
|
|
||||||
|
|
||||||
|
@skip_openai_lc_wrapper_test
|
||||||
@patch(
|
@patch(
|
||||||
"openai.resources.chat.completions.Completions.create",
|
"openai.resources.chat.completions.Completions.create",
|
||||||
side_effect=_openai_chat_completion_responses_react,
|
side_effect=_openai_chat_completion_responses_react,
|
||||||
|
|
|
@ -4,10 +4,10 @@ import pytest
|
||||||
from openai.types.chat.chat_completion import ChatCompletion
|
from openai.types.chat.chat_completion import ChatCompletion
|
||||||
|
|
||||||
from kotaemon.llms import (
|
from kotaemon.llms import (
|
||||||
|
AzureChatOpenAI,
|
||||||
BasePromptComponent,
|
BasePromptComponent,
|
||||||
GatedBranchingPipeline,
|
GatedBranchingPipeline,
|
||||||
GatedLinearPipeline,
|
GatedLinearPipeline,
|
||||||
LCAzureChatOpenAI,
|
|
||||||
SimpleBranchingPipeline,
|
SimpleBranchingPipeline,
|
||||||
SimpleLinearPipeline,
|
SimpleLinearPipeline,
|
||||||
)
|
)
|
||||||
|
@ -40,12 +40,11 @@ _openai_chat_completion_response = ChatCompletion.parse_obj(
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def mock_llm():
|
def mock_llm():
|
||||||
return LCAzureChatOpenAI(
|
return AzureChatOpenAI(
|
||||||
azure_endpoint="OPENAI_API_BASE",
|
api_key="dummy",
|
||||||
openai_api_key="OPENAI_API_KEY",
|
api_version="2024-05-01-preview",
|
||||||
openai_api_version="OPENAI_API_VERSION",
|
azure_deployment="gpt-4o",
|
||||||
deployment_name="dummy-q2-gpt35",
|
azure_endpoint="https://test.openai.azure.com/",
|
||||||
temperature=0,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ from unittest.mock import patch
|
||||||
|
|
||||||
from openai.types.chat.chat_completion import ChatCompletion
|
from openai.types.chat.chat_completion import ChatCompletion
|
||||||
|
|
||||||
from kotaemon.llms import LCAzureChatOpenAI
|
from kotaemon.llms import AzureChatOpenAI
|
||||||
from kotaemon.llms.cot import ManualSequentialChainOfThought, Thought
|
from kotaemon.llms.cot import ManualSequentialChainOfThought, Thought
|
||||||
|
|
||||||
_openai_chat_completion_response = [
|
_openai_chat_completion_response = [
|
||||||
|
@ -38,12 +38,11 @@ _openai_chat_completion_response = [
|
||||||
side_effect=_openai_chat_completion_response,
|
side_effect=_openai_chat_completion_response,
|
||||||
)
|
)
|
||||||
def test_cot_plus_operator(openai_completion):
|
def test_cot_plus_operator(openai_completion):
|
||||||
llm = LCAzureChatOpenAI(
|
llm = AzureChatOpenAI(
|
||||||
azure_endpoint="https://dummy.openai.azure.com/",
|
api_key="dummy",
|
||||||
openai_api_key="dummy",
|
api_version="2024-05-01-preview",
|
||||||
openai_api_version="2023-03-15-preview",
|
azure_deployment="gpt-4o",
|
||||||
deployment_name="dummy-q2",
|
azure_endpoint="https://test.openai.azure.com/",
|
||||||
temperature=0,
|
|
||||||
)
|
)
|
||||||
thought1 = Thought(
|
thought1 = Thought(
|
||||||
prompt="Word {word} in {language} is ",
|
prompt="Word {word} in {language} is ",
|
||||||
|
@ -70,12 +69,11 @@ def test_cot_plus_operator(openai_completion):
|
||||||
side_effect=_openai_chat_completion_response,
|
side_effect=_openai_chat_completion_response,
|
||||||
)
|
)
|
||||||
def test_cot_manual(openai_completion):
|
def test_cot_manual(openai_completion):
|
||||||
llm = LCAzureChatOpenAI(
|
llm = AzureChatOpenAI(
|
||||||
azure_endpoint="https://dummy.openai.azure.com/",
|
api_key="dummy",
|
||||||
openai_api_key="dummy",
|
api_version="2024-05-01-preview",
|
||||||
openai_api_version="2023-03-15-preview",
|
azure_deployment="gpt-4o",
|
||||||
deployment_name="dummy-q2",
|
azure_endpoint="https://test.openai.azure.com/",
|
||||||
temperature=0,
|
|
||||||
)
|
)
|
||||||
thought1 = Thought(
|
thought1 = Thought(
|
||||||
prompt="Word {word} in {language} is ",
|
prompt="Word {word} in {language} is ",
|
||||||
|
@ -100,12 +98,11 @@ def test_cot_manual(openai_completion):
|
||||||
side_effect=_openai_chat_completion_response,
|
side_effect=_openai_chat_completion_response,
|
||||||
)
|
)
|
||||||
def test_cot_with_termination_callback(openai_completion):
|
def test_cot_with_termination_callback(openai_completion):
|
||||||
llm = LCAzureChatOpenAI(
|
llm = AzureChatOpenAI(
|
||||||
azure_endpoint="https://dummy.openai.azure.com/",
|
api_key="dummy",
|
||||||
openai_api_key="dummy",
|
api_version="2024-05-01-preview",
|
||||||
openai_api_version="2023-03-15-preview",
|
azure_deployment="gpt-4o",
|
||||||
deployment_name="dummy-q2",
|
azure_endpoint="https://test.openai.azure.com/",
|
||||||
temperature=0,
|
|
||||||
)
|
)
|
||||||
thought1 = Thought(
|
thought1 = Thought(
|
||||||
prompt="Word {word} in {language} is ",
|
prompt="Word {word} in {language} is ",
|
||||||
|
|
|
@ -8,12 +8,16 @@ from kotaemon.base import Document
|
||||||
from kotaemon.embeddings import (
|
from kotaemon.embeddings import (
|
||||||
AzureOpenAIEmbeddings,
|
AzureOpenAIEmbeddings,
|
||||||
FastEmbedEmbeddings,
|
FastEmbedEmbeddings,
|
||||||
LCAzureOpenAIEmbeddings,
|
|
||||||
LCCohereEmbeddings,
|
LCCohereEmbeddings,
|
||||||
LCHuggingFaceEmbeddings,
|
LCHuggingFaceEmbeddings,
|
||||||
OpenAIEmbeddings,
|
OpenAIEmbeddings,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from .conftest import (
|
||||||
|
skip_when_fastembed_not_installed,
|
||||||
|
skip_when_sentence_bert_not_installed,
|
||||||
|
)
|
||||||
|
|
||||||
with open(Path(__file__).parent / "resources" / "embedding_openai_batch.json") as f:
|
with open(Path(__file__).parent / "resources" / "embedding_openai_batch.json") as f:
|
||||||
openai_embedding_batch = CreateEmbeddingResponse.model_validate(json.load(f))
|
openai_embedding_batch = CreateEmbeddingResponse.model_validate(json.load(f))
|
||||||
|
|
||||||
|
@ -32,12 +36,12 @@ def assert_embedding_result(output):
|
||||||
"openai.resources.embeddings.Embeddings.create",
|
"openai.resources.embeddings.Embeddings.create",
|
||||||
side_effect=lambda *args, **kwargs: openai_embedding,
|
side_effect=lambda *args, **kwargs: openai_embedding,
|
||||||
)
|
)
|
||||||
def test_lcazureopenai_embeddings_raw(openai_embedding_call):
|
def test_azureopenai_embeddings_raw(openai_embedding_call):
|
||||||
model = LCAzureOpenAIEmbeddings(
|
model = AzureOpenAIEmbeddings(
|
||||||
model="text-embedding-ada-002",
|
azure_deployment="embedding-deployment",
|
||||||
deployment="embedding-deployment",
|
|
||||||
azure_endpoint="https://test.openai.azure.com/",
|
azure_endpoint="https://test.openai.azure.com/",
|
||||||
openai_api_key="some-key",
|
api_key="some-key",
|
||||||
|
api_version="version",
|
||||||
)
|
)
|
||||||
output = model("Hello world")
|
output = model("Hello world")
|
||||||
assert_embedding_result(output)
|
assert_embedding_result(output)
|
||||||
|
@ -49,29 +53,13 @@ def test_lcazureopenai_embeddings_raw(openai_embedding_call):
|
||||||
side_effect=lambda *args, **kwargs: openai_embedding_batch,
|
side_effect=lambda *args, **kwargs: openai_embedding_batch,
|
||||||
)
|
)
|
||||||
def test_lcazureopenai_embeddings_batch_raw(openai_embedding_call):
|
def test_lcazureopenai_embeddings_batch_raw(openai_embedding_call):
|
||||||
model = LCAzureOpenAIEmbeddings(
|
|
||||||
model="text-embedding-ada-002",
|
|
||||||
deployment="embedding-deployment",
|
|
||||||
azure_endpoint="https://test.openai.azure.com/",
|
|
||||||
openai_api_key="some-key",
|
|
||||||
)
|
|
||||||
output = model(["Hello world", "Goodbye world"])
|
|
||||||
assert_embedding_result(output)
|
|
||||||
openai_embedding_call.assert_called()
|
|
||||||
|
|
||||||
|
|
||||||
@patch(
|
|
||||||
"openai.resources.embeddings.Embeddings.create",
|
|
||||||
side_effect=lambda *args, **kwargs: openai_embedding,
|
|
||||||
)
|
|
||||||
def test_azureopenai_embeddings_raw(openai_embedding_call):
|
|
||||||
model = AzureOpenAIEmbeddings(
|
model = AzureOpenAIEmbeddings(
|
||||||
|
azure_deployment="embedding-deployment",
|
||||||
azure_endpoint="https://test.openai.azure.com/",
|
azure_endpoint="https://test.openai.azure.com/",
|
||||||
api_key="some-key",
|
api_key="some-key",
|
||||||
api_version="version",
|
api_version="version",
|
||||||
azure_deployment="text-embedding-ada-002",
|
|
||||||
)
|
)
|
||||||
output = model("Hello world")
|
output = model(["Hello world", "Goodbye world"])
|
||||||
assert_embedding_result(output)
|
assert_embedding_result(output)
|
||||||
openai_embedding_call.assert_called()
|
openai_embedding_call.assert_called()
|
||||||
|
|
||||||
|
@ -82,10 +70,10 @@ def test_azureopenai_embeddings_raw(openai_embedding_call):
|
||||||
)
|
)
|
||||||
def test_azureopenai_embeddings_batch_raw(openai_embedding_call):
|
def test_azureopenai_embeddings_batch_raw(openai_embedding_call):
|
||||||
model = AzureOpenAIEmbeddings(
|
model = AzureOpenAIEmbeddings(
|
||||||
|
azure_deployment="text-embedding-ada-002",
|
||||||
azure_endpoint="https://test.openai.azure.com/",
|
azure_endpoint="https://test.openai.azure.com/",
|
||||||
api_key="some-key",
|
api_key="some-key",
|
||||||
api_version="version",
|
api_version="version",
|
||||||
azure_deployment="text-embedding-ada-002",
|
|
||||||
)
|
)
|
||||||
output = model(["Hello world", "Goodbye world"])
|
output = model(["Hello world", "Goodbye world"])
|
||||||
assert_embedding_result(output)
|
assert_embedding_result(output)
|
||||||
|
@ -120,6 +108,7 @@ def test_openai_embeddings_batch_raw(openai_embedding_call):
|
||||||
openai_embedding_call.assert_called()
|
openai_embedding_call.assert_called()
|
||||||
|
|
||||||
|
|
||||||
|
@skip_when_sentence_bert_not_installed
|
||||||
@patch(
|
@patch(
|
||||||
"sentence_transformers.SentenceTransformer",
|
"sentence_transformers.SentenceTransformer",
|
||||||
side_effect=lambda *args, **kwargs: None,
|
side_effect=lambda *args, **kwargs: None,
|
||||||
|
@ -149,7 +138,9 @@ def test_lchuggingface_embeddings(
|
||||||
)
|
)
|
||||||
def test_lccohere_embeddings(langchain_cohere_embedding_call):
|
def test_lccohere_embeddings(langchain_cohere_embedding_call):
|
||||||
model = LCCohereEmbeddings(
|
model = LCCohereEmbeddings(
|
||||||
model="embed-english-light-v2.0", cohere_api_key="my-api-key"
|
model="embed-english-light-v2.0",
|
||||||
|
cohere_api_key="my-api-key",
|
||||||
|
user_agent="test",
|
||||||
)
|
)
|
||||||
|
|
||||||
output = model("Hello World")
|
output = model("Hello World")
|
||||||
|
@ -157,6 +148,7 @@ def test_lccohere_embeddings(langchain_cohere_embedding_call):
|
||||||
langchain_cohere_embedding_call.assert_called()
|
langchain_cohere_embedding_call.assert_called()
|
||||||
|
|
||||||
|
|
||||||
|
@skip_when_fastembed_not_installed
|
||||||
def test_fastembed_embeddings():
|
def test_fastembed_embeddings():
|
||||||
model = FastEmbedEmbeddings()
|
model = FastEmbedEmbeddings()
|
||||||
output = model("Hello World")
|
output = model("Hello World")
|
||||||
|
|
|
@ -1,32 +1,31 @@
|
||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import cast
|
from typing import cast
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
import pytest
|
from openai.types.create_embedding_response import CreateEmbeddingResponse
|
||||||
from openai.resources.embeddings import Embeddings
|
|
||||||
|
|
||||||
from kotaemon.base import Document
|
from kotaemon.base import Document
|
||||||
from kotaemon.embeddings import LCAzureOpenAIEmbeddings
|
from kotaemon.embeddings import AzureOpenAIEmbeddings
|
||||||
from kotaemon.indices import VectorIndexing, VectorRetrieval
|
from kotaemon.indices import VectorIndexing, VectorRetrieval
|
||||||
from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore
|
from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore
|
||||||
|
|
||||||
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
|
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
|
||||||
openai_embedding = json.load(f)
|
openai_embedding = CreateEmbeddingResponse.model_validate(json.load(f))
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function")
|
@patch(
|
||||||
def mock_openai_embedding(monkeypatch):
|
"openai.resources.embeddings.Embeddings.create",
|
||||||
monkeypatch.setattr(Embeddings, "create", lambda *args, **kwargs: openai_embedding)
|
side_effect=lambda *args, **kwargs: openai_embedding,
|
||||||
|
)
|
||||||
|
def test_indexing(tmp_path):
|
||||||
def test_indexing(mock_openai_embedding, tmp_path):
|
|
||||||
db = ChromaVectorStore(path=str(tmp_path))
|
db = ChromaVectorStore(path=str(tmp_path))
|
||||||
doc_store = InMemoryDocumentStore()
|
doc_store = InMemoryDocumentStore()
|
||||||
embedding = LCAzureOpenAIEmbeddings(
|
embedding = AzureOpenAIEmbeddings(
|
||||||
model="text-embedding-ada-002",
|
azure_deployment="text-embedding-ada-002",
|
||||||
deployment="embedding-deployment",
|
|
||||||
azure_endpoint="https://test.openai.azure.com/",
|
azure_endpoint="https://test.openai.azure.com/",
|
||||||
openai_api_key="some-key",
|
api_key="some-key",
|
||||||
|
api_version="version",
|
||||||
)
|
)
|
||||||
|
|
||||||
pipeline = VectorIndexing(vector_store=db, embedding=embedding, doc_store=doc_store)
|
pipeline = VectorIndexing(vector_store=db, embedding=embedding, doc_store=doc_store)
|
||||||
|
@ -39,14 +38,18 @@ def test_indexing(mock_openai_embedding, tmp_path):
|
||||||
assert len(pipeline.doc_store._store) == 1, "Expected 1 document"
|
assert len(pipeline.doc_store._store) == 1, "Expected 1 document"
|
||||||
|
|
||||||
|
|
||||||
def test_retrieving(mock_openai_embedding, tmp_path):
|
@patch(
|
||||||
|
"openai.resources.embeddings.Embeddings.create",
|
||||||
|
side_effect=lambda *args, **kwargs: openai_embedding,
|
||||||
|
)
|
||||||
|
def test_retrieving(tmp_path):
|
||||||
db = ChromaVectorStore(path=str(tmp_path))
|
db = ChromaVectorStore(path=str(tmp_path))
|
||||||
doc_store = InMemoryDocumentStore()
|
doc_store = InMemoryDocumentStore()
|
||||||
embedding = LCAzureOpenAIEmbeddings(
|
embedding = AzureOpenAIEmbeddings(
|
||||||
model="text-embedding-ada-002",
|
azure_deployment="text-embedding-ada-002",
|
||||||
deployment="embedding-deployment",
|
|
||||||
azure_endpoint="https://test.openai.azure.com/",
|
azure_endpoint="https://test.openai.azure.com/",
|
||||||
openai_api_key="some-key",
|
api_key="some-key",
|
||||||
|
api_version="version",
|
||||||
)
|
)
|
||||||
|
|
||||||
index_pipeline = VectorIndexing(
|
index_pipeline = VectorIndexing(
|
||||||
|
|
|
@ -4,15 +4,17 @@ from unittest.mock import patch
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from kotaemon.base.schema import AIMessage, HumanMessage, LLMInterface, SystemMessage
|
from kotaemon.base.schema import AIMessage, HumanMessage, LLMInterface, SystemMessage
|
||||||
from kotaemon.llms import LCAzureChatOpenAI, LlamaCppChat
|
from kotaemon.llms import AzureChatOpenAI, LlamaCppChat
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from langchain_openai import AzureChatOpenAI as AzureChatOpenAILC
|
pass
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from langchain.chat_models import AzureChatOpenAI as AzureChatOpenAILC
|
pass
|
||||||
|
|
||||||
from openai.types.chat.chat_completion import ChatCompletion
|
from openai.types.chat.chat_completion import ChatCompletion
|
||||||
|
|
||||||
|
from .conftest import skip_llama_cpp_not_installed
|
||||||
|
|
||||||
_openai_chat_completion_response = ChatCompletion.parse_obj(
|
_openai_chat_completion_response = ChatCompletion.parse_obj(
|
||||||
{
|
{
|
||||||
"id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
|
"id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
|
||||||
|
@ -43,17 +45,12 @@ _openai_chat_completion_response = ChatCompletion.parse_obj(
|
||||||
side_effect=lambda *args, **kwargs: _openai_chat_completion_response,
|
side_effect=lambda *args, **kwargs: _openai_chat_completion_response,
|
||||||
)
|
)
|
||||||
def test_azureopenai_model(openai_completion):
|
def test_azureopenai_model(openai_completion):
|
||||||
model = LCAzureChatOpenAI(
|
model = AzureChatOpenAI(
|
||||||
|
api_key="dummy",
|
||||||
|
api_version="2024-05-01-preview",
|
||||||
|
azure_deployment="gpt-4o",
|
||||||
azure_endpoint="https://test.openai.azure.com/",
|
azure_endpoint="https://test.openai.azure.com/",
|
||||||
openai_api_key="some-key",
|
|
||||||
openai_api_version="2023-03-15-preview",
|
|
||||||
deployment_name="gpt35turbo",
|
|
||||||
temperature=0,
|
|
||||||
)
|
)
|
||||||
assert isinstance(
|
|
||||||
model.to_langchain_format(), AzureChatOpenAILC
|
|
||||||
), "Agent not wrapped in Langchain's AzureChatOpenAI"
|
|
||||||
|
|
||||||
# test for str input - stream mode
|
# test for str input - stream mode
|
||||||
output = model("hello world")
|
output = model("hello world")
|
||||||
assert isinstance(
|
assert isinstance(
|
||||||
|
@ -76,6 +73,7 @@ def test_azureopenai_model(openai_completion):
|
||||||
openai_completion.assert_called()
|
openai_completion.assert_called()
|
||||||
|
|
||||||
|
|
||||||
|
@skip_llama_cpp_not_installed
|
||||||
def test_llamacpp_chat():
|
def test_llamacpp_chat():
|
||||||
from llama_cpp import Llama
|
from llama_cpp import Llama
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,8 @@ except ImportError:
|
||||||
|
|
||||||
from openai.types.completion import Completion
|
from openai.types.completion import Completion
|
||||||
|
|
||||||
|
from .conftest import skip_llama_cpp_not_installed, skip_openai_lc_wrapper_test
|
||||||
|
|
||||||
_openai_completion_response = Completion.parse_obj(
|
_openai_completion_response = Completion.parse_obj(
|
||||||
{
|
{
|
||||||
"id": "cmpl-7qyNoIo6gRSCJR0hi8o3ZKBH4RkJ0",
|
"id": "cmpl-7qyNoIo6gRSCJR0hi8o3ZKBH4RkJ0",
|
||||||
|
@ -33,6 +35,7 @@ _openai_completion_response = Completion.parse_obj(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@skip_openai_lc_wrapper_test
|
||||||
@patch(
|
@patch(
|
||||||
"openai.resources.completions.Completions.create",
|
"openai.resources.completions.Completions.create",
|
||||||
side_effect=lambda *args, **kwargs: _openai_completion_response,
|
side_effect=lambda *args, **kwargs: _openai_completion_response,
|
||||||
|
@ -79,6 +82,7 @@ def test_openai_model(openai_completion):
|
||||||
), "Output for single text is not LLMInterface"
|
), "Output for single text is not LLMInterface"
|
||||||
|
|
||||||
|
|
||||||
|
@skip_llama_cpp_not_installed
|
||||||
def test_llamacpp_model():
|
def test_llamacpp_model():
|
||||||
weight_path = Path(__file__).parent / "resources" / "ggml-vocab-llama.gguf"
|
weight_path = Path(__file__).parent / "resources" / "ggml-vocab-llama.gguf"
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,8 @@ from kotaemon.loaders import (
|
||||||
UnstructuredReader,
|
UnstructuredReader,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from .conftest import skip_when_unstructured_not_installed
|
||||||
|
|
||||||
|
|
||||||
def test_docx_reader():
|
def test_docx_reader():
|
||||||
reader = DocxReader()
|
reader = DocxReader()
|
||||||
|
@ -52,6 +54,7 @@ def test_pdf_reader():
|
||||||
assert len(nodes) > 0
|
assert len(nodes) > 0
|
||||||
|
|
||||||
|
|
||||||
|
@skip_when_unstructured_not_installed
|
||||||
def test_unstructured_pdf_reader():
|
def test_unstructured_pdf_reader():
|
||||||
reader = UnstructuredReader()
|
reader = UnstructuredReader()
|
||||||
dirpath = Path(__file__).parent
|
dirpath = Path(__file__).parent
|
||||||
|
|
|
@ -5,7 +5,7 @@ from openai.types.chat.chat_completion import ChatCompletion
|
||||||
|
|
||||||
from kotaemon.base import Document
|
from kotaemon.base import Document
|
||||||
from kotaemon.indices.rankings import LLMReranking
|
from kotaemon.indices.rankings import LLMReranking
|
||||||
from kotaemon.llms import LCAzureChatOpenAI
|
from kotaemon.llms import AzureChatOpenAI
|
||||||
|
|
||||||
_openai_chat_completion_responses = [
|
_openai_chat_completion_responses = [
|
||||||
ChatCompletion.parse_obj(
|
ChatCompletion.parse_obj(
|
||||||
|
@ -41,11 +41,11 @@ _openai_chat_completion_responses = [
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def llm():
|
def llm():
|
||||||
return LCAzureChatOpenAI(
|
return AzureChatOpenAI(
|
||||||
azure_endpoint="https://dummy.openai.azure.com/",
|
api_key="dummy",
|
||||||
openai_api_key="dummy",
|
api_version="2024-05-01-preview",
|
||||||
openai_api_version="2023-03-15-preview",
|
azure_deployment="gpt-4o",
|
||||||
temperature=0,
|
azure_endpoint="https://test.openai.azure.com/",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,8 @@ import pytest
|
||||||
|
|
||||||
from kotaemon.loaders import MathpixPDFReader, OCRReader, PandasExcelReader
|
from kotaemon.loaders import MathpixPDFReader, OCRReader, PandasExcelReader
|
||||||
|
|
||||||
|
from .conftest import skip_when_unstructured_not_installed
|
||||||
|
|
||||||
input_file = Path(__file__).parent / "resources" / "table.pdf"
|
input_file = Path(__file__).parent / "resources" / "table.pdf"
|
||||||
input_file_excel = Path(__file__).parent / "resources" / "dummy.xlsx"
|
input_file_excel = Path(__file__).parent / "resources" / "dummy.xlsx"
|
||||||
|
|
||||||
|
@ -26,6 +28,7 @@ def mathpix_output():
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
@skip_when_unstructured_not_installed
|
||||||
def test_ocr_reader(fullocr_output):
|
def test_ocr_reader(fullocr_output):
|
||||||
reader = OCRReader()
|
reader = OCRReader()
|
||||||
documents = reader.load_data(input_file, response_content=fullocr_output)
|
documents = reader.load_data(input_file, response_content=fullocr_output)
|
||||||
|
|
|
@ -1,22 +1,17 @@
|
||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
import pytest
|
from openai.types.create_embedding_response import CreateEmbeddingResponse
|
||||||
from openai.resources.embeddings import Embeddings
|
|
||||||
|
|
||||||
from kotaemon.agents.tools import ComponentTool, GoogleSearchTool, WikipediaTool
|
from kotaemon.agents.tools import ComponentTool, GoogleSearchTool, WikipediaTool
|
||||||
from kotaemon.base import Document
|
from kotaemon.base import Document
|
||||||
from kotaemon.embeddings import LCAzureOpenAIEmbeddings
|
from kotaemon.embeddings import AzureOpenAIEmbeddings
|
||||||
from kotaemon.indices.vectorindex import VectorIndexing, VectorRetrieval
|
from kotaemon.indices.vectorindex import VectorIndexing, VectorRetrieval
|
||||||
from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore
|
from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore
|
||||||
|
|
||||||
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
|
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
|
||||||
openai_embedding = json.load(f)
|
openai_embedding = CreateEmbeddingResponse.model_validate(json.load(f))
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function")
|
|
||||||
def mock_openai_embedding(monkeypatch):
|
|
||||||
monkeypatch.setattr(Embeddings, "create", lambda *args, **kwargs: openai_embedding)
|
|
||||||
|
|
||||||
|
|
||||||
def test_google_tool(mock_google_search):
|
def test_google_tool(mock_google_search):
|
||||||
|
@ -35,14 +30,18 @@ def test_wikipedia_tool():
|
||||||
assert output
|
assert output
|
||||||
|
|
||||||
|
|
||||||
def test_pipeline_tool(mock_openai_embedding, tmp_path):
|
@patch(
|
||||||
|
"openai.resources.embeddings.Embeddings.create",
|
||||||
|
side_effect=lambda *args, **kwargs: openai_embedding,
|
||||||
|
)
|
||||||
|
def test_pipeline_tool(tmp_path):
|
||||||
db = ChromaVectorStore(path=str(tmp_path))
|
db = ChromaVectorStore(path=str(tmp_path))
|
||||||
doc_store = InMemoryDocumentStore()
|
doc_store = InMemoryDocumentStore()
|
||||||
embedding = LCAzureOpenAIEmbeddings(
|
embedding = AzureOpenAIEmbeddings(
|
||||||
model="text-embedding-ada-002",
|
azure_deployment="embedding-deployment",
|
||||||
deployment="embedding-deployment",
|
|
||||||
azure_endpoint="https://test.openai.azure.com/",
|
azure_endpoint="https://test.openai.azure.com/",
|
||||||
openai_api_key="some-key",
|
api_key="some-key",
|
||||||
|
api_version="version",
|
||||||
)
|
)
|
||||||
|
|
||||||
index_pipeline = VectorIndexing(
|
index_pipeline = VectorIndexing(
|
||||||
|
|
|
@ -7,7 +7,7 @@ from index import ReaderIndexingPipeline
|
||||||
from openai.resources.embeddings import Embeddings
|
from openai.resources.embeddings import Embeddings
|
||||||
from openai.types.chat.chat_completion import ChatCompletion
|
from openai.types.chat.chat_completion import ChatCompletion
|
||||||
|
|
||||||
from kotaemon.llms import LCAzureChatOpenAI
|
from kotaemon.llms import AzureChatOpenAI
|
||||||
|
|
||||||
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
|
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
|
||||||
openai_embedding = json.load(f)
|
openai_embedding = json.load(f)
|
||||||
|
@ -61,12 +61,11 @@ def test_ingest_pipeline(patch, mock_openai_embedding, tmp_path):
|
||||||
assert len(results) == 1
|
assert len(results) == 1
|
||||||
|
|
||||||
# create llm
|
# create llm
|
||||||
llm = LCAzureChatOpenAI(
|
llm = AzureChatOpenAI(
|
||||||
openai_api_base="https://test.openai.azure.com/",
|
api_key="dummy",
|
||||||
openai_api_key="some-key",
|
api_version="2024-05-01-preview",
|
||||||
openai_api_version="2023-03-15-preview",
|
azure_deployment="gpt-4o",
|
||||||
deployment_name="gpt35turbo",
|
azure_endpoint="https://test.openai.azure.com/",
|
||||||
temperature=0,
|
|
||||||
)
|
)
|
||||||
qa_pipeline = indexing_pipeline.to_qa_pipeline(llm=llm, openai_api_key="some-key")
|
qa_pipeline = indexing_pipeline.to_qa_pipeline(llm=llm, openai_api_key="some-key")
|
||||||
response = qa_pipeline("Summarize this document.")
|
response = qa_pipeline("Summarize this document.")
|
||||||
|
|
Loading…
Reference in New Issue
Block a user