Refactor the index component and update the MVP insurance accordingly (#90)

Refactor the `kotaemon/pipelines` module to `kotaemon/indices`. Create the VectorIndex.

Note: currently I place `qa` to be inside `kotaemon/indices` since at the moment we only have `qa` in RAG. At the same time, I think `qa` can be an independent module in `kotaemon/qa`. Since this can be changed later, I still go at the 1st option for now to observe if we can change it later.
This commit is contained in:
Duc Nguyen (john)
2023-11-30 18:35:07 +07:00
committed by GitHub
parent 8e3a1d193f
commit e34b1e4c6d
25 changed files with 396 additions and 605 deletions

View File

@@ -5,7 +5,7 @@ from typing import Any, Type
from llama_index.node_parser.interface import NodeParser
from ..base import BaseComponent, Document
from kotaemon.base import BaseComponent, Document, RetrievedDocument
class DocTransformer(BaseComponent):
@@ -26,7 +26,7 @@ class DocTransformer(BaseComponent):
...
class LlamaIndexMixin:
class LlamaIndexDocTransformerMixin:
"""Allow automatically wrapping a Llama-index component into kotaemon component
Example:
@@ -70,3 +70,23 @@ class LlamaIndexMixin:
"""
docs = self._obj(documents, **kwargs) # type: ignore
return [Document.from_dict(doc.to_dict()) for doc in docs]
class BaseIndexing(BaseComponent):
"""Define the base interface for indexing pipeline"""
def to_retrieval_pipeline(self, **kwargs):
"""Convert the indexing pipeline to a retrieval pipeline"""
raise NotImplementedError
def to_qa_pipeline(self, **kwargs):
"""Convert the indexing pipeline to a QA pipeline"""
raise NotImplementedError
class BaseRetrieval(BaseComponent):
"""Define the base interface for retrieval pipeline"""
@abstractmethod
def run(self, *args, **kwargs) -> list[RetrievedDocument]:
...