Migrate the MVP into kotaemon (#108)
- Migrate the MVP into kotaemon. - Preliminary include the pipeline within chatbot interface. - Organize MVP as an application. Todo: - Add an info panel to view the planning of agents -> Fix streaming agents' output. Resolve: #60 Resolve: #61 Resolve: #62
This commit is contained in:
committed by
GitHub
parent
230328c62f
commit
5a9d6f75be
@@ -16,6 +16,7 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
|
||||
elasticsearch_url: str = "http://localhost:9200",
|
||||
k1: float = 2.0,
|
||||
b: float = 0.75,
|
||||
**kwargs,
|
||||
):
|
||||
try:
|
||||
from elasticsearch import Elasticsearch
|
||||
@@ -31,7 +32,7 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
|
||||
self.b = b
|
||||
|
||||
# Create an Elasticsearch client instance
|
||||
self.client = Elasticsearch(elasticsearch_url)
|
||||
self.client = Elasticsearch(elasticsearch_url, **kwargs)
|
||||
self.es_bulk = bulk
|
||||
# Define the index settings and mappings
|
||||
settings = {
|
||||
@@ -63,19 +64,16 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
|
||||
self,
|
||||
docs: Union[Document, List[Document]],
|
||||
ids: Optional[Union[List[str], str]] = None,
|
||||
**kwargs
|
||||
refresh_indices: bool = True,
|
||||
**kwargs,
|
||||
):
|
||||
"""Add document into document store
|
||||
|
||||
Args:
|
||||
docs: list of documents to add
|
||||
ids: specify the ids of documents to add or
|
||||
use existing doc.doc_id
|
||||
refresh_indices: request Elasticsearch to update
|
||||
its index (default to True)
|
||||
ids: specify the ids of documents to add or use existing doc.doc_id
|
||||
refresh_indices: request Elasticsearch to update its index (default to True)
|
||||
"""
|
||||
refresh_indices = kwargs.pop("refresh_indices", True)
|
||||
|
||||
if ids and not isinstance(ids, list):
|
||||
ids = [ids]
|
||||
if not isinstance(docs, list):
|
||||
@@ -120,7 +118,9 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
|
||||
)
|
||||
return docs
|
||||
|
||||
def query(self, query: str, top_k: int = 10) -> List[Document]:
|
||||
def query(
|
||||
self, query: str, top_k: int = 10, doc_ids: Optional[list] = None
|
||||
) -> List[Document]:
|
||||
"""Search Elasticsearch docstore using search query (BM25)
|
||||
|
||||
Args:
|
||||
@@ -131,7 +131,9 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
|
||||
Returns:
|
||||
List[Document]: List of result documents
|
||||
"""
|
||||
query_dict = {"query": {"match": {"content": query}}, "size": top_k}
|
||||
query_dict: dict = {"query": {"match": {"content": query}}, "size": top_k}
|
||||
if doc_ids:
|
||||
query_dict["query"]["match"]["_id"] = {"values": doc_ids}
|
||||
return self.query_raw(query_dict)
|
||||
|
||||
def get(self, ids: Union[List[str], str]) -> List[Document]:
|
||||
|
@@ -74,6 +74,11 @@ class InMemoryDocumentStore(BaseDocumentStore):
|
||||
"""Load document store from path"""
|
||||
with open(path) as f:
|
||||
store = json.load(f)
|
||||
# TODO: save and load aren't lossless. A Document-subclass will lose
|
||||
# information. Need to edit the `to_dict` and `from_dict` methods in
|
||||
# the Document class.
|
||||
# For better query support, utilize SQLite as the default document store.
|
||||
# Also, for portability, use SQLAlchemy for document store.
|
||||
self._store = {key: Document.from_dict(value) for key, value in store.items()}
|
||||
|
||||
def __persist_flow__(self):
|
||||
|
@@ -15,6 +15,18 @@ class SimpleFileDocumentStore(InMemoryDocumentStore):
|
||||
if path is not None and Path(path).is_file():
|
||||
self.load(path)
|
||||
|
||||
def get(self, ids: Union[List[str], str]) -> List[Document]:
|
||||
"""Get document by id"""
|
||||
if not isinstance(ids, list):
|
||||
ids = [ids]
|
||||
|
||||
for doc_id in ids:
|
||||
if doc_id not in self._store:
|
||||
self.load(self._path)
|
||||
break
|
||||
|
||||
return [self._store[doc_id] for doc_id in ids]
|
||||
|
||||
def add(
|
||||
self,
|
||||
docs: Union[Document, List[Document]],
|
||||
|
Reference in New Issue
Block a user