Move LLM-related components into LLM module (#74)

* Move splitter into indexing module
* Rename post_processing module to parsers
* Migrate LLM-specific composite pipelines into llms module

This change moves the `splitters` module into `indexing` module. The `indexing` module will be created soon, to house `indexing`-related components.

This change renames `post_processing` module into `parsers` module. Post-processing is a generic term which provides very little information. In the future, we will add other extractors into the `parser` module, like Metadata extractor...

This change migrates the composite elements into `llms` module. These elements heavily assume that the internal nodes are llm-specific. As a result, migrating these elements into `llms` module will make them more discoverable, and simplify code base structure.
This commit is contained in:
Nguyen Trung Duc (john) 2023-11-15 16:26:53 +07:00 committed by GitHub
parent 9945afdf6f
commit f8b8d86d4e
13 changed files with 41 additions and 35 deletions

View File

@ -1,9 +0,0 @@
from .branching import GatedBranchingPipeline, SimpleBranchingPipeline
from .linear import GatedLinearPipeline, SimpleLinearPipeline
__all__ = [
"SimpleLinearPipeline",
"GatedLinearPipeline",
"SimpleBranchingPipeline",
"GatedBranchingPipeline",
]

View File

@ -1,8 +1,9 @@
from langchain.schema.messages import AIMessage, SystemMessage
from .chats import AzureChatOpenAI, ChatLLM
from .chats.base import BaseMessage, HumanMessage
from .branching import GatedBranchingPipeline, SimpleBranchingPipeline
from .chats import AzureChatOpenAI, BaseMessage, ChatLLM, HumanMessage
from .completions import LLM, AzureOpenAI, OpenAI
from .linear import GatedLinearPipeline, SimpleLinearPipeline
from .prompts import BasePromptComponent, PromptTemplate
__all__ = [
@ -20,4 +21,9 @@ __all__ = [
# prompt-specific components
"BasePromptComponent",
"PromptTemplate",
# strategies
"SimpleLinearPipeline",
"GatedLinearPipeline",
"SimpleBranchingPipeline",
"GatedBranchingPipeline",
]

View File

@ -2,8 +2,8 @@ from typing import List, Optional
from theflow import Param
from kotaemon.base import BaseComponent, Document
from kotaemon.composite.linear import GatedLinearPipeline
from ..base import BaseComponent, Document
from .linear import GatedLinearPipeline
class SimpleBranchingPipeline(BaseComponent):
@ -14,10 +14,12 @@ class SimpleBranchingPipeline(BaseComponent):
branches (List[BaseComponent]): The list of branches to be executed.
Example Usage:
from kotaemon.composite import GatedLinearPipeline
from kotaemon.llms.chats.openai import AzureChatOpenAI
from kotaemon.post_processing.extractor import RegexExtractor
from kotaemon.llms import BasePromptComponent
from kotaemon.llms import (
AzureChatOpenAI,
BasePromptComponent,
GatedLinearPipeline,
)
from kotaemon.parsers import RegexExtractor
def identity(x):
return x
@ -87,10 +89,12 @@ class GatedBranchingPipeline(SimpleBranchingPipeline):
branches (List[BaseComponent]): The list of branches to be executed.
Example Usage:
from kotaemon.composite import GatedLinearPipeline
from kotaemon.llms.chats.openai import AzureChatOpenAI
from kotaemon.post_processing.extractor import RegexExtractor
from kotaemon.llms import BasePromptComponent
from kotaemon.llms import (
AzureChatOpenAI,
BasePromptComponent,
GatedLinearPipeline,
)
from kotaemon.parsers import RegexExtractor
def identity(x):
return x
@ -152,7 +156,7 @@ if __name__ == "__main__":
from kotaemon.llms import BasePromptComponent
from kotaemon.llms.chats.openai import AzureChatOpenAI
from kotaemon.post_processing.extractor import RegexExtractor
from kotaemon.parsers import RegexExtractor
def identity(x):
return x

View File

@ -1,4 +1,4 @@
from .base import ChatLLM
from .base import BaseMessage, ChatLLM, HumanMessage
from .openai import AzureChatOpenAI
__all__ = ["ChatLLM", "AzureChatOpenAI"]
__all__ = ["ChatLLM", "AzureChatOpenAI", "BaseMessage", "HumanMessage"]

View File

@ -1,8 +1,10 @@
from typing import Any, Callable, Optional, Union
from kotaemon.base import BaseComponent
from kotaemon.base.schema import Document, IO_Type
from kotaemon.llms import LLM, BasePromptComponent, ChatLLM
from ..base import BaseComponent
from ..base.schema import Document, IO_Type
from .chats import ChatLLM
from .completions import LLM
from .prompts import BasePromptComponent
class SimpleLinearPipeline(BaseComponent):
@ -86,8 +88,8 @@ class GatedLinearPipeline(SimpleLinearPipeline):
Example Usage:
from kotaemon.llms.chats.openai import AzureChatOpenAI
from kotaemon.post_processing.extractor import RegexExtractor
from kotaemon.llms import BasePromptComponent
from kotaemon.parsers import RegexExtractor
def identity(x):
return x

View File

@ -0,0 +1,3 @@
from .regex_extractor import FirstMatchRegexExtractor, RegexExtractor
__all__ = ["RegexExtractor", "FirstMatchRegexExtractor"]

View File

@ -9,6 +9,7 @@ from theflow.utils.modules import ObjectInitDeclaration as _
from kotaemon.base import BaseComponent
from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.indexing.splitters import SimpleNodeParser
from kotaemon.loaders import (
AutoReader,
DirectoryReader,
@ -16,7 +17,6 @@ from kotaemon.loaders import (
OCRReader,
PandasExcelReader,
)
from kotaemon.parsers.splitter import SimpleNodeParser
from kotaemon.pipelines.agents import BaseAgent
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
from kotaemon.pipelines.reranking import BaseRerankingPipeline

View File

@ -3,15 +3,15 @@ from copy import deepcopy
import pytest
from openai.types.chat.chat_completion import ChatCompletion
from kotaemon.composite import (
from kotaemon.llms import (
AzureChatOpenAI,
BasePromptComponent,
GatedBranchingPipeline,
GatedLinearPipeline,
SimpleBranchingPipeline,
SimpleLinearPipeline,
)
from kotaemon.llms import BasePromptComponent
from kotaemon.llms.chats.openai import AzureChatOpenAI
from kotaemon.post_processing.extractor import RegexExtractor
from kotaemon.parsers import RegexExtractor
_openai_chat_completion_response = ChatCompletion.parse_obj(
{

View File

@ -1,7 +1,7 @@
import pytest
from kotaemon.base import Document
from kotaemon.post_processing.extractor import RegexExtractor
from kotaemon.parsers import RegexExtractor
@pytest.fixture

View File

@ -2,7 +2,7 @@ import pytest
from kotaemon.base import Document
from kotaemon.llms import BasePromptComponent, PromptTemplate
from kotaemon.post_processing.extractor import RegexExtractor
from kotaemon.parsers import RegexExtractor
def test_set_attributes():