- Migrate the MVP into kotaemon. - Preliminary include the pipeline within chatbot interface. - Organize MVP as an application. Todo: - Add an info panel to view the planning of agents -> Fix streaming agents' output. Resolve: #60 Resolve: #61 Resolve: #62
50 lines
1.3 KiB
Python
50 lines
1.3 KiB
Python
from ..base import DocTransformer, LlamaIndexDocTransformerMixin
|
|
|
|
|
|
class BaseSplitter(DocTransformer):
|
|
"""Represent base splitter class"""
|
|
|
|
...
|
|
|
|
|
|
class TokenSplitter(LlamaIndexDocTransformerMixin, BaseSplitter):
|
|
def __init__(
|
|
self,
|
|
chunk_size: int = 1024,
|
|
chunk_overlap: int = 20,
|
|
separator: str = " ",
|
|
**params,
|
|
):
|
|
super().__init__(
|
|
chunk_size=chunk_size,
|
|
chunk_overlap=chunk_overlap,
|
|
separator=separator,
|
|
**params,
|
|
)
|
|
|
|
def _get_li_class(self):
|
|
from llama_index.text_splitter import TokenTextSplitter
|
|
|
|
return TokenTextSplitter
|
|
|
|
|
|
class SentenceWindowSplitter(LlamaIndexDocTransformerMixin, BaseSplitter):
|
|
def __init__(
|
|
self,
|
|
window_size: int = 3,
|
|
window_metadata_key: str = "window",
|
|
original_text_metadata_key: str = "original_text",
|
|
**params,
|
|
):
|
|
super().__init__(
|
|
window_size=window_size,
|
|
window_metadata_key=window_metadata_key,
|
|
original_text_metadata_key=original_text_metadata_key,
|
|
**params,
|
|
)
|
|
|
|
def _get_li_class(self):
|
|
from llama_index.node_parser import SentenceWindowNodeParser
|
|
|
|
return SentenceWindowNodeParser
|