[AUR-391, AUR-393] Add Document and DocumentReader base (#6)
* Declare BaseComponent * Brainstorming base class for LLM call * Define base LLM * Add tests * Clean telemetry environment for accurate testing * Fix README * Fix typing * add base document reader * update test * update requirements * Cosmetic change * update requirements * reformat --------- Co-authored-by: trducng <trungduc1992@gmail.com>
This commit is contained in:
committed by
GitHub
parent
4211315a54
commit
21350153d4
@@ -1,10 +1,26 @@
|
||||
class DocumentLoader:
|
||||
"""Document loader"""
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Type, Union
|
||||
|
||||
from llama_index import download_loader
|
||||
from llama_index.readers.base import BaseReader
|
||||
|
||||
from ..documents.base import Document
|
||||
|
||||
|
||||
class TextManipulator:
|
||||
"""Text manipulation"""
|
||||
class AutoReader(BaseReader):
|
||||
"""General auto reader for a variety of files. (based on llama-hub)"""
|
||||
|
||||
def __init__(self, reader_type: Union[str, Type[BaseReader]]) -> None:
|
||||
"""Init reader using string identifier or class name from llama-hub"""
|
||||
|
||||
class DocumentManipulator:
|
||||
"""Document manipulation"""
|
||||
if isinstance(reader_type, str):
|
||||
self._reader = download_loader(reader_type)()
|
||||
else:
|
||||
self._reader = reader_type()
|
||||
|
||||
def load_data(self, file: Union[Path, str], **kwargs: Any) -> List[Document]:
|
||||
documents = self._reader.load_data(file=file, **kwargs)
|
||||
|
||||
# convert Document to new base class from kotaemon
|
||||
converted_documents = [Document.from_dict(doc.to_dict()) for doc in documents]
|
||||
return converted_documents
|
||||
|
Reference in New Issue
Block a user