[AUR-391, AUR-393] Add Document and DocumentReader base (#6)
* Declare BaseComponent * Brainstorming base class for LLM call * Define base LLM * Add tests * Clean telemetry environment for accurate testing * Fix README * Fix typing * add base document reader * update test * update requirements * Cosmetic change * update requirements * reformat --------- Co-authored-by: trducng <trungduc1992@gmail.com>
This commit is contained in:
committed by
GitHub
parent
4211315a54
commit
21350153d4
22
knowledgehub/documents/base.py
Normal file
22
knowledgehub/documents/base.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from haystack.schema import Document as HaystackDocument
|
||||
from llama_index.schema import Document as BaseDocument
|
||||
|
||||
SAMPLE_TEXT = "A sample Document from kotaemon"
|
||||
|
||||
|
||||
class Document(BaseDocument):
|
||||
"""Base document class, mostly inherited from Document class from llama-index"""
|
||||
|
||||
@classmethod
|
||||
def example(cls) -> "Document":
|
||||
document = Document(
|
||||
text=SAMPLE_TEXT,
|
||||
metadata={"filename": "README.md", "category": "codebase"},
|
||||
)
|
||||
return document
|
||||
|
||||
def to_haystack_format(self) -> HaystackDocument:
|
||||
"""Convert struct to Haystack document format."""
|
||||
metadata = self.metadata or {}
|
||||
text = self.text
|
||||
return HaystackDocument(content=text, meta=metadata)
|
Reference in New Issue
Block a user