Add file-based document store and vector store (#96)

* Modify docstore and vectorstore objects to be reconstructable
* Simplify the file docstore
* Use the simple file docstore and vector store in MVP
This commit is contained in:
Duc Nguyen (john)
2023-12-04 17:46:00 +07:00
committed by GitHub
parent 0ce3a8832f
commit 37c744b616
18 changed files with 324 additions and 149 deletions

View File

@@ -1,7 +1,6 @@
from __future__ import annotations
import uuid
from pathlib import Path
from typing import Optional, Sequence, cast
from kotaemon.base import BaseComponent, Document, RetrievedDocument
@@ -68,37 +67,6 @@ class VectorIndexing(BaseIndexing):
if self.doc_store:
self.doc_store.add(input_)
def save(
self,
path: str | Path,
vectorstore_fname: str = VECTOR_STORE_FNAME,
docstore_fname: str = DOC_STORE_FNAME,
):
"""Save the whole state of the indexing pipeline vector store and all
necessary information to disk
Args:
path (str): path to save the state
"""
if isinstance(path, str):
path = Path(path)
self.vector_store.save(path / vectorstore_fname)
if self.doc_store:
self.doc_store.save(path / docstore_fname)
def load(
self,
path: str | Path,
vectorstore_fname: str = VECTOR_STORE_FNAME,
docstore_fname: str = DOC_STORE_FNAME,
):
"""Load all information from disk to an object"""
if isinstance(path, str):
path = Path(path)
self.vector_store.load(path / vectorstore_fname)
if self.doc_store:
self.doc_store.load(path / docstore_fname)
class VectorRetrieval(BaseRetrieval):
"""Retrieve list of documents from vector store"""
@@ -144,37 +112,6 @@ class VectorRetrieval(BaseRetrieval):
return result
def save(
self,
path: str | Path,
vectorstore_fname: str = VECTOR_STORE_FNAME,
docstore_fname: str = DOC_STORE_FNAME,
):
"""Save the whole state of the indexing pipeline vector store and all
necessary information to disk
Args:
path (str): path to save the state
"""
if isinstance(path, str):
path = Path(path)
self.vector_store.save(path / vectorstore_fname)
if self.doc_store:
self.doc_store.save(path / docstore_fname)
def load(
self,
path: str | Path,
vectorstore_fname: str = VECTOR_STORE_FNAME,
docstore_fname: str = DOC_STORE_FNAME,
):
"""Load all information from disk to an object"""
if isinstance(path, str):
path = Path(path)
self.vector_store.load(path / vectorstore_fname)
if self.doc_store:
self.doc_store.load(path / docstore_fname)
class TextVectorQA(BaseComponent):
retrieving_pipeline: BaseRetrieval