[AUR-391, AUR-393] Add Document and DocumentReader base (#6)

* Declare BaseComponent

* Brainstorming base class for LLM call

* Define base LLM

* Add tests

* Clean telemetry environment for accurate testing

* Fix README

* Fix typing

* add base document reader

* update test

* update requirements

* Cosmetic change

* update requirements

* reformat

---------

Co-authored-by: trducng <trungduc1992@gmail.com>
This commit is contained in:
Tuan Anh Nguyen Dang (Tadashi_Cin)
2023-08-31 11:24:12 +07:00
committed by GitHub
parent 4211315a54
commit 21350153d4
6 changed files with 82 additions and 6 deletions

View File

@@ -0,0 +1,22 @@
from haystack.schema import Document as HaystackDocument
from llama_index.schema import Document as BaseDocument
SAMPLE_TEXT = "A sample Document from kotaemon"
class Document(BaseDocument):
"""Base document class, mostly inherited from Document class from llama-index"""
@classmethod
def example(cls) -> "Document":
document = Document(
text=SAMPLE_TEXT,
metadata={"filename": "README.md", "category": "codebase"},
)
return document
def to_haystack_format(self) -> HaystackDocument:
"""Convert struct to Haystack document format."""
metadata = self.metadata or {}
text = self.text
return HaystackDocument(content=text, meta=metadata)