[AUR-395] Adopt Example1 disclaimer pipeline (#42)
* Adopt Example1 disclaimer pipeline * Update Document class * Add composite components * Modify Extractor behaviours
This commit is contained in:
@@ -1,12 +1,43 @@
|
||||
from typing import Any, Optional
|
||||
|
||||
from haystack.schema import Document as HaystackDocument
|
||||
from llama_index.bridge.pydantic import Field
|
||||
from llama_index.schema import Document as BaseDocument
|
||||
from pyparsing import TypeVar
|
||||
|
||||
IO_Type = TypeVar("IO_Type", "Document", str)
|
||||
SAMPLE_TEXT = "A sample Document from kotaemon"
|
||||
|
||||
|
||||
class Document(BaseDocument):
|
||||
"""Base document class, mostly inherited from Document class from llama-index"""
|
||||
"""
|
||||
Base document class, mostly inherited from Document class from llama-index.
|
||||
|
||||
This class accept one positional argument `content` of an arbitrary type, which will
|
||||
store the raw content of the document. If specified, the class will use
|
||||
`content` to initialize the base llama_index class.
|
||||
"""
|
||||
|
||||
content: Any
|
||||
|
||||
def __init__(self, content: Optional[Any] = None, *args, **kwargs):
|
||||
if content is None:
|
||||
if kwargs.get("text", None) is not None:
|
||||
kwargs["content"] = kwargs["text"]
|
||||
elif kwargs.get("embedding", None) is not None:
|
||||
kwargs["content"] = kwargs["embedding"]
|
||||
elif isinstance(content, Document):
|
||||
kwargs = content.dict()
|
||||
else:
|
||||
kwargs["content"] = content
|
||||
if content:
|
||||
kwargs["text"] = str(content)
|
||||
else:
|
||||
kwargs["text"] = ""
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def __bool__(self):
|
||||
return bool(self.content)
|
||||
|
||||
@classmethod
|
||||
def example(cls) -> "Document":
|
||||
@@ -23,7 +54,7 @@ class Document(BaseDocument):
|
||||
return HaystackDocument(content=text, meta=metadata)
|
||||
|
||||
def __str__(self):
|
||||
return self.text
|
||||
return str(self.content)
|
||||
|
||||
|
||||
class RetrievedDocument(Document):
|
||||
|
Reference in New Issue
Block a user