Enable fastembed as a local embedding vendor (#12)

* Prepend all Langchain-based embeddings with LC

* Provide vanilla OpenAI embeddings

* Add test for AzureOpenAIEmbeddings and OpenAIEmbeddings

* Incorporate fastembed

---------

Co-authored-by: ian_Cin <ian@cinnamon.is>
This commit is contained in:
Duc Nguyen (john)
2024-04-09 01:44:34 +07:00
committed by GitHub
parent 8001c86b16
commit e75354d410
14 changed files with 406 additions and 59 deletions

View File

@@ -3,7 +3,7 @@ from typing import List
from kotaemon.base import BaseComponent, Document, LLMInterface, Node, Param, lazy
from kotaemon.contribs.promptui.logs import ResultLog
from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.embeddings import LCAzureOpenAIEmbeddings
from kotaemon.indices import VectorIndexing, VectorRetrieval
from kotaemon.llms import LCAzureChatOpenAI
from kotaemon.storages import ChromaVectorStore, SimpleFileDocumentStore
@@ -47,7 +47,7 @@ class QuestionAnsweringPipeline(BaseComponent):
VectorRetrieval.withx(
vector_store=lazy(ChromaVectorStore).withx(path="./tmp"),
doc_store=lazy(SimpleFileDocumentStore).withx(path="docstore.json"),
embedding=AzureOpenAIEmbeddings.withx(
embedding=LCAzureOpenAIEmbeddings.withx(
model="text-embedding-ada-002",
deployment="dummy-q2-text-embedding",
azure_endpoint="https://bleh-dummy-2.openai.azure.com/",
@@ -82,7 +82,7 @@ class IndexingPipeline(VectorIndexing):
lazy(SimpleFileDocumentStore).withx(path="docstore.json"),
ignore_ui=True,
)
embedding: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings.withx(
embedding: LCAzureOpenAIEmbeddings = LCAzureOpenAIEmbeddings.withx(
model="text-embedding-ada-002",
deployment="dummy-q2-text-embedding",
azure_endpoint="https://bleh-dummy-2.openai.azure.com/",