Refactor the index component and update the MVP insurance accordingly (#90)

Refactor the `kotaemon/pipelines` module to `kotaemon/indices`. Create the VectorIndex. Note: currently I place `qa` to be inside `kotaemon/indices` since at the moment we only have `qa` in RAG. At the same time, I think `qa` can be an independent module in `kotaemon/qa`. Since this can be changed later, I still go at the 1st option for now to observe if we can change it later.
2023-11-30 18:35:07 +07:00
parent 8e3a1d193f
commit e34b1e4c6d
25 changed files with 396 additions and 605 deletions
--- a/knowledgehub/indices/base.py
+++ b/knowledgehub/indices/base.py
@@ -5,7 +5,7 @@ from typing import Any, Type

 from llama_index.node_parser.interface import NodeParser

-from ..base import BaseComponent, Document
+from kotaemon.base import BaseComponent, Document, RetrievedDocument


 class DocTransformer(BaseComponent):
@@ -26,7 +26,7 @@ class DocTransformer(BaseComponent):
        ...


-class LlamaIndexMixin:
+class LlamaIndexDocTransformerMixin:
    """Allow automatically wrapping a Llama-index component into kotaemon component

    Example:
@@ -70,3 +70,23 @@ class LlamaIndexMixin:
        """
        docs = self._obj(documents, **kwargs)  # type: ignore
        return [Document.from_dict(doc.to_dict()) for doc in docs]
+
+
+class BaseIndexing(BaseComponent):
+    """Define the base interface for indexing pipeline"""
+
+    def to_retrieval_pipeline(self, **kwargs):
+        """Convert the indexing pipeline to a retrieval pipeline"""
+        raise NotImplementedError
+
+    def to_qa_pipeline(self, **kwargs):
+        """Convert the indexing pipeline to a QA pipeline"""
+        raise NotImplementedError
+
+
+class BaseRetrieval(BaseComponent):
+    """Define the base interface for retrieval pipeline"""
+
+    @abstractmethod
+    def run(self, *args, **kwargs) -> list[RetrievedDocument]:
+        ...