Separate rerankers, splitters and extractors (#85)

This commit is contained in:
Nguyen Trung Duc (john)
2023-11-27 14:25:54 +07:00
committed by GitHub
parent 0dede9c82d
commit 2186c5558f
15 changed files with 211 additions and 135 deletions

View File

@@ -0,0 +1,7 @@
from .doc_parsers import BaseDocParser, SummaryExtractor, TitleExtractor
__all__ = [
"BaseDocParser",
"TitleExtractor",
"SummaryExtractor",
]

View File

@@ -0,0 +1,19 @@
from ..base import DocTransformer, LlamaIndexMixin
class BaseDocParser(DocTransformer):
...
class TitleExtractor(LlamaIndexMixin, BaseDocParser):
def _get_li_class(self):
from llama_index.extractors import TitleExtractor
return TitleExtractor
class SummaryExtractor(LlamaIndexMixin, BaseDocParser):
def _get_li_class(self):
from llama_index.extractors import SummaryExtractor
return SummaryExtractor