Refactor the index component and update the MVP insurance accordingly (#90)
Refactor the `kotaemon/pipelines` module to `kotaemon/indices`. Create the VectorIndex. Note: currently I place `qa` to be inside `kotaemon/indices` since at the moment we only have `qa` in RAG. At the same time, I think `qa` can be an independent module in `kotaemon/qa`. Since this can be changed later, I still go at the 1st option for now to observe if we can change it later.
This commit is contained in:
committed by
GitHub
parent
8e3a1d193f
commit
e34b1e4c6d
@@ -7,8 +7,7 @@ from openai.resources.embeddings import Embeddings
|
||||
|
||||
from kotaemon.base import Document
|
||||
from kotaemon.embeddings.openai import AzureOpenAIEmbeddings
|
||||
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
|
||||
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
|
||||
from kotaemon.indices import VectorIndexing, VectorRetrieval
|
||||
from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore
|
||||
|
||||
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
|
||||
@@ -30,9 +29,7 @@ def test_indexing(mock_openai_embedding, tmp_path):
|
||||
openai_api_key="some-key",
|
||||
)
|
||||
|
||||
pipeline = IndexVectorStoreFromDocumentPipeline(
|
||||
vector_store=db, embedding=embedding, doc_store=doc_store
|
||||
)
|
||||
pipeline = VectorIndexing(vector_store=db, embedding=embedding, doc_store=doc_store)
|
||||
pipeline.doc_store = cast(InMemoryDocumentStore, pipeline.doc_store)
|
||||
pipeline.vector_store = cast(ChromaVectorStore, pipeline.vector_store)
|
||||
assert pipeline.vector_store._collection.count() == 0, "Expected empty collection"
|
||||
@@ -52,10 +49,10 @@ def test_retrieving(mock_openai_embedding, tmp_path):
|
||||
openai_api_key="some-key",
|
||||
)
|
||||
|
||||
index_pipeline = IndexVectorStoreFromDocumentPipeline(
|
||||
index_pipeline = VectorIndexing(
|
||||
vector_store=db, embedding=embedding, doc_store=doc_store
|
||||
)
|
||||
retrieval_pipeline = RetrieveDocumentFromVectorStorePipeline(
|
||||
retrieval_pipeline = VectorRetrieval(
|
||||
vector_store=db, doc_store=doc_store, embedding=embedding
|
||||
)
|
||||
|
||||
|
Reference in New Issue
Block a user