Add file-based document store and vector store (#96)
* Modify docstore and vectorstore objects to be reconstructable * Simplify the file docstore * Use the simple file docstore and vector store in MVP
This commit is contained in:
committed by
GitHub
parent
0ce3a8832f
commit
37c744b616
44
knowledgehub/storages/docstores/simple_file.py
Normal file
44
knowledgehub/storages/docstores/simple_file.py
Normal file
@@ -0,0 +1,44 @@
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from kotaemon.base import Document
|
||||
|
||||
from .in_memory import InMemoryDocumentStore
|
||||
|
||||
|
||||
class SimpleFileDocumentStore(InMemoryDocumentStore):
|
||||
"""Improve InMemoryDocumentStore by auto saving whenever the corpus is changed"""
|
||||
|
||||
def __init__(self, path: str | Path):
|
||||
super().__init__()
|
||||
self._path = path
|
||||
if path is not None and Path(path).is_file():
|
||||
self.load(path)
|
||||
|
||||
def add(
|
||||
self,
|
||||
docs: Union[Document, List[Document]],
|
||||
ids: Optional[Union[List[str], str]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""Add document into document store
|
||||
|
||||
Args:
|
||||
docs: list of documents to add
|
||||
ids: specify the ids of documents to add or
|
||||
use existing doc.doc_id
|
||||
exist_ok: raise error when duplicate doc-id
|
||||
found in the docstore (default to False)
|
||||
"""
|
||||
super().add(docs=docs, ids=ids, **kwargs)
|
||||
self.save(self._path)
|
||||
|
||||
def delete(self, ids: Union[List[str], str]):
|
||||
"""Delete document by id"""
|
||||
super().delete(ids=ids)
|
||||
self.save(self._path)
|
||||
|
||||
def __persist_flow__(self):
|
||||
from theflow.utils.modules import serialize
|
||||
|
||||
return {"path": serialize(self._path)}
|
Reference in New Issue
Block a user