Update Base interface of Index/Retrieval pipeline (#36)

* add base Tool

* minor update test_tool

* update test dependency

* update test dependency

* Fix namespace conflict

* update test

* add base Agent Interface, add ReWoo Agent

* minor update

* update test

* fix typo

* remove unneeded print

* update rewoo agent

* add LLMTool

* update BaseAgent type

* add ReAct agent

* add ReAct agent

* minor update

* minor update

* minor update

* minor update

* update base reader with BaseComponent

* add splitter

* update agent and tool

* update vectorstores

* update load/save for indexing and retrieving pipeline

* update test_agent for more use-cases

* add missing dependency for test

* update test case for in memory vectorstore

* add TextSplitter to BaseComponent

* update type hint basetool

---------

Co-authored-by: trducng <trungduc1992@gmail.com>
This commit is contained in:
Tuan Anh Nguyen Dang (Tadashi_Cin)
2023-10-04 14:27:44 +07:00
committed by GitHub
parent 49ed3f6994
commit 56bc41b673
13 changed files with 302 additions and 36 deletions

View File

@@ -1,13 +1,14 @@
from pathlib import Path
from typing import Any, List, Type, Union
from llama_index import download_loader
from llama_index import SimpleDirectoryReader, download_loader
from llama_index.readers.base import BaseReader
from ..base import BaseComponent
from ..documents.base import Document
class AutoReader(BaseReader):
class AutoReader(BaseComponent, BaseReader):
"""General auto reader for a variety of files. (based on llama-hub)"""
def __init__(self, reader_type: Union[str, Type[BaseReader]]) -> None:
@@ -17,6 +18,7 @@ class AutoReader(BaseReader):
self._reader = download_loader(reader_type)()
else:
self._reader = reader_type()
super().__init__()
def load_data(self, file: Union[Path, str], **kwargs: Any) -> List[Document]:
documents = self._reader.load_data(file=file, **kwargs)
@@ -24,3 +26,42 @@ class AutoReader(BaseReader):
# convert Document to new base class from kotaemon
converted_documents = [Document.from_dict(doc.to_dict()) for doc in documents]
return converted_documents
def run(self, file: Union[Path, str], **kwargs: Any) -> List[Document]:
return self.load_data(file=file, **kwargs)
class LIBaseReader(BaseComponent, BaseReader):
_reader_class: Type[BaseReader]
def __init__(self, *args, **kwargs):
if self._reader_class is None:
raise AttributeError(
"Require `_reader_class` to set a BaseReader class from LlamarIndex"
)
self._reader = self._reader_class(*args, **kwargs)
super().__init__()
def __setattr__(self, name: str, value: Any) -> None:
if name.startswith("_"):
return super().__setattr__(name, value)
return setattr(self._reader, name, value)
def __getattr__(self, name: str) -> Any:
return getattr(self._reader, name)
def load_data(self, *args, **kwargs: Any) -> List[Document]:
documents = self._reader.load_data(*args, **kwargs)
# convert Document to new base class from kotaemon
converted_documents = [Document.from_dict(doc.to_dict()) for doc in documents]
return converted_documents
def run(self, *args, **kwargs: Any) -> List[Document]:
return self.load_data(*args, **kwargs)
class DirectoryReader(LIBaseReader):
_reader_class = SimpleDirectoryReader