Enforce all IO objects to be subclassed from Document (#88)

* enforce Document as IO

* Separate rerankers, splitters and extractors (#85)

* partially refractor importing

* add text to embedding outputs

---------

Co-authored-by: Nguyen Trung Duc (john) <trungduc1992@gmail.com>
This commit is contained in:
ian_Cin
2023-11-27 16:35:09 +07:00
committed by GitHub
parent 2186c5558f
commit 8e0779a22d
13 changed files with 108 additions and 59 deletions

View File

@@ -56,7 +56,7 @@ def test_cot_plus_operator(openai_completion):
)
thought = thought1 + thought2
output = thought(word="hello", language="French")
assert output == {
assert output.content == {
"word": "hello",
"language": "French",
"translated": "Bonjour",
@@ -86,7 +86,7 @@ def test_cot_manual(openai_completion):
)
thought = ManualSequentialChainOfThought(thoughts=[thought1, thought2], llm=llm)
output = thought(word="hello", language="French")
assert output == {
assert output.content == {
"word": "hello",
"language": "French",
"translated": "Bonjour",
@@ -120,7 +120,7 @@ def test_cot_with_termination_callback(openai_completion):
terminate=lambda d: True if d.get("translated", "") == "Bonjour" else False,
)
output = thought(word="hallo", language="French")
assert output == {
assert output.content == {
"word": "hallo",
"language": "French",
"translated": "Bonjour",

View File

@@ -2,6 +2,7 @@ import json
from pathlib import Path
from unittest.mock import patch
from kotaemon.base import Document
from kotaemon.embeddings.cohere import CohereEmbdeddings
from kotaemon.embeddings.huggingface import HuggingFaceEmbeddings
from kotaemon.embeddings.openai import AzureOpenAIEmbeddings
@@ -26,8 +27,9 @@ def test_azureopenai_embeddings_raw(openai_embedding_call):
)
output = model("Hello world")
assert isinstance(output, list)
assert isinstance(output[0], list)
assert isinstance(output[0][0], float)
assert isinstance(output[0], Document)
assert isinstance(output[0].embedding, list)
assert isinstance(output[0].embedding[0], float)
openai_embedding_call.assert_called()
@@ -44,8 +46,9 @@ def test_azureopenai_embeddings_batch_raw(openai_embedding_call):
)
output = model(["Hello world", "Goodbye world"])
assert isinstance(output, list)
assert isinstance(output[0], list)
assert isinstance(output[0][0], float)
assert isinstance(output[0], Document)
assert isinstance(output[0].embedding, list)
assert isinstance(output[0].embedding[0], float)
openai_embedding_call.assert_called()
@@ -68,8 +71,9 @@ def test_huggingface_embddings(
output = model("Hello World")
assert isinstance(output, list)
assert isinstance(output[0], list)
assert isinstance(output[0][0], float)
assert isinstance(output[0], Document)
assert isinstance(output[0].embedding, list)
assert isinstance(output[0].embedding[0], float)
sentence_transformers_init.assert_called()
langchain_huggingface_embedding_call.assert_called()
@@ -85,6 +89,7 @@ def test_cohere_embeddings(langchain_cohere_embedding_call):
output = model("Hello World")
assert isinstance(output, list)
assert isinstance(output[0], list)
assert isinstance(output[0][0], float)
assert isinstance(output[0], Document)
assert isinstance(output[0].embedding, list)
assert isinstance(output[0].embedding[0], float)
langchain_cohere_embedding_call.assert_called()