Move Document and other interface into base/schema (#69)
This commit is contained in:
parent
4704e2c11a
commit
8532138842
|
@ -1,3 +1,4 @@
|
||||||
from .component import BaseComponent
|
from .component import BaseComponent
|
||||||
|
from .schema import Document
|
||||||
|
|
||||||
__all__ = ["BaseComponent"]
|
__all__ = ["BaseComponent", "Document"]
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import TYPE_CHECKING, Any, Optional, TypeVar
|
from typing import TYPE_CHECKING, Any, Optional, TypeVar
|
||||||
|
|
||||||
from llama_index.bridge.pydantic import Field
|
from llama_index.bridge.pydantic import Field
|
||||||
|
@ -72,3 +74,19 @@ class RetrievedDocument(Document):
|
||||||
|
|
||||||
score: float = Field(default=0.0)
|
score: float = Field(default=0.0)
|
||||||
retrieval_metadata: dict = Field(default={})
|
retrieval_metadata: dict = Field(default={})
|
||||||
|
|
||||||
|
|
||||||
|
class LLMInterface(Document):
|
||||||
|
candidates: list[str] = Field(default_factory=list)
|
||||||
|
completion_tokens: int = -1
|
||||||
|
total_tokens: int = -1
|
||||||
|
prompt_tokens: int = -1
|
||||||
|
logits: list[list[float]] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class ExtractorOutput(Document):
|
||||||
|
"""
|
||||||
|
Represents the output of an extractor.
|
||||||
|
"""
|
||||||
|
|
||||||
|
matches: list[str]
|
|
@ -5,7 +5,7 @@ from langchain.schema.messages import AIMessage, SystemMessage
|
||||||
from theflow import Param, SessionCompose
|
from theflow import Param, SessionCompose
|
||||||
|
|
||||||
from ..base import BaseComponent
|
from ..base import BaseComponent
|
||||||
from ..llms.base import LLMInterface
|
from ..base.schema import LLMInterface
|
||||||
from ..llms.chats.base import BaseMessage, HumanMessage
|
from ..llms.chats.base import BaseMessage, HumanMessage
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,9 +2,8 @@ from typing import List, Optional
|
||||||
|
|
||||||
from theflow import Param
|
from theflow import Param
|
||||||
|
|
||||||
from kotaemon.base import BaseComponent
|
from kotaemon.base import BaseComponent, Document
|
||||||
from kotaemon.composite.linear import GatedLinearPipeline
|
from kotaemon.composite.linear import GatedLinearPipeline
|
||||||
from kotaemon.documents.base import Document
|
|
||||||
|
|
||||||
|
|
||||||
class SimpleBranchingPipeline(BaseComponent):
|
class SimpleBranchingPipeline(BaseComponent):
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from typing import Any, Callable, Optional, Union
|
from typing import Any, Callable, Optional, Union
|
||||||
|
|
||||||
from kotaemon.base import BaseComponent
|
from kotaemon.base import BaseComponent
|
||||||
from kotaemon.documents.base import Document, IO_Type
|
from kotaemon.base.schema import Document, IO_Type
|
||||||
from kotaemon.llms.chats.base import ChatLLM
|
from kotaemon.llms.chats.base import ChatLLM
|
||||||
from kotaemon.llms.completions.base import LLM
|
from kotaemon.llms.completions.base import LLM
|
||||||
from kotaemon.prompt.base import BasePromptComponent
|
from kotaemon.prompt.base import BasePromptComponent
|
||||||
|
|
|
@ -2,7 +2,7 @@ from abc import ABC, abstractmethod
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Optional, Union
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
from ..documents.base import Document
|
from ..base import Document
|
||||||
|
|
||||||
|
|
||||||
class BaseDocumentStore(ABC):
|
class BaseDocumentStore(ABC):
|
||||||
|
|
|
@ -2,7 +2,7 @@ import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Optional, Union
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
from ..documents.base import Document
|
from ..base import Document
|
||||||
from .base import BaseDocumentStore
|
from .base import BaseDocumentStore
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,3 +0,0 @@
|
||||||
from .base import Document
|
|
||||||
|
|
||||||
__all__ = ["Document"]
|
|
|
@ -6,8 +6,7 @@ from typing import Type
|
||||||
from langchain.schema.embeddings import Embeddings as LCEmbeddings
|
from langchain.schema.embeddings import Embeddings as LCEmbeddings
|
||||||
from theflow import Param
|
from theflow import Param
|
||||||
|
|
||||||
from ..base import BaseComponent
|
from ..base import BaseComponent, Document
|
||||||
from ..documents.base import Document
|
|
||||||
|
|
||||||
|
|
||||||
class BaseEmbeddings(BaseComponent):
|
class BaseEmbeddings(BaseComponent):
|
||||||
|
|
|
@ -1,13 +0,0 @@
|
||||||
from typing import List
|
|
||||||
|
|
||||||
from pydantic import Field
|
|
||||||
|
|
||||||
from kotaemon.documents.base import Document
|
|
||||||
|
|
||||||
|
|
||||||
class LLMInterface(Document):
|
|
||||||
candidates: List[str] = Field(default_factory=list)
|
|
||||||
completion_tokens: int = -1
|
|
||||||
total_tokens: int = -1
|
|
||||||
prompt_tokens: int = -1
|
|
||||||
logits: List[List[float]] = Field(default_factory=list)
|
|
|
@ -8,7 +8,7 @@ from langchain.schema.messages import BaseMessage, HumanMessage
|
||||||
from theflow.base import Param
|
from theflow.base import Param
|
||||||
|
|
||||||
from ...base import BaseComponent
|
from ...base import BaseComponent
|
||||||
from ..base import LLMInterface
|
from ...base.schema import LLMInterface
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@ from langchain.llms.base import BaseLLM
|
||||||
from theflow.base import Param
|
from theflow.base import Param
|
||||||
|
|
||||||
from ...base import BaseComponent
|
from ...base import BaseComponent
|
||||||
from ..base import LLMInterface
|
from ...base.schema import LLMInterface
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
|
@ -4,8 +4,7 @@ from typing import Any, List, Type, Union
|
||||||
from llama_index import SimpleDirectoryReader, download_loader
|
from llama_index import SimpleDirectoryReader, download_loader
|
||||||
from llama_index.readers.base import BaseReader
|
from llama_index.readers.base import BaseReader
|
||||||
|
|
||||||
from ..base import BaseComponent
|
from ..base import BaseComponent, Document
|
||||||
from ..documents.base import Document
|
|
||||||
|
|
||||||
|
|
||||||
class AutoReader(BaseComponent):
|
class AutoReader(BaseComponent):
|
||||||
|
|
|
@ -8,7 +8,7 @@ from typing import Any, List, Optional, Union
|
||||||
|
|
||||||
from llama_index.readers.base import BaseReader
|
from llama_index.readers.base import BaseReader
|
||||||
|
|
||||||
from kotaemon.documents import Document
|
from kotaemon.base import Document
|
||||||
|
|
||||||
|
|
||||||
class PandasExcelReader(BaseReader):
|
class PandasExcelReader(BaseReader):
|
||||||
|
|
|
@ -8,7 +8,7 @@ import requests
|
||||||
from langchain.utils import get_from_dict_or_env
|
from langchain.utils import get_from_dict_or_env
|
||||||
from llama_index.readers.base import BaseReader
|
from llama_index.readers.base import BaseReader
|
||||||
|
|
||||||
from kotaemon.documents import Document
|
from kotaemon.base import Document
|
||||||
|
|
||||||
from .utils.table import parse_markdown_text_to_tables, strip_special_chars_markdown
|
from .utils.table import parse_markdown_text_to_tables, strip_special_chars_markdown
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@ from uuid import uuid4
|
||||||
import requests
|
import requests
|
||||||
from llama_index.readers.base import BaseReader
|
from llama_index.readers.base import BaseReader
|
||||||
|
|
||||||
from kotaemon.documents import Document
|
from kotaemon.base import Document
|
||||||
|
|
||||||
from .utils.pdf_ocr import parse_ocr_output, read_pdf_unstructured
|
from .utils.pdf_ocr import parse_ocr_output, read_pdf_unstructured
|
||||||
from .utils.table import strip_special_chars_markdown
|
from .utils.table import strip_special_chars_markdown
|
||||||
|
|
|
@ -4,9 +4,7 @@ from llama_index.node_parser import SimpleNodeParser as LISimpleNodeParser
|
||||||
from llama_index.node_parser.interface import NodeParser
|
from llama_index.node_parser.interface import NodeParser
|
||||||
from llama_index.text_splitter import TokenTextSplitter
|
from llama_index.text_splitter import TokenTextSplitter
|
||||||
|
|
||||||
from kotaemon.base import BaseComponent
|
from ..base import BaseComponent, Document
|
||||||
|
|
||||||
from ..documents.base import Document
|
|
||||||
|
|
||||||
__all__ = ["TokenTextSplitter"]
|
__all__ = ["TokenTextSplitter"]
|
||||||
|
|
||||||
|
|
|
@ -5,9 +5,8 @@ from pathlib import Path
|
||||||
|
|
||||||
from theflow import Node, Param
|
from theflow import Node, Param
|
||||||
|
|
||||||
from ..base import BaseComponent
|
from ..base import BaseComponent, Document
|
||||||
from ..docstores import BaseDocumentStore
|
from ..docstores import BaseDocumentStore
|
||||||
from ..documents.base import Document
|
|
||||||
from ..embeddings import BaseEmbeddings
|
from ..embeddings import BaseEmbeddings
|
||||||
from ..vectorstores import BaseVectorStore
|
from ..vectorstores import BaseVectorStore
|
||||||
|
|
||||||
|
|
|
@ -6,8 +6,8 @@ from theflow import Node
|
||||||
from theflow.utils.modules import ObjectInitDeclaration as _
|
from theflow.utils.modules import ObjectInitDeclaration as _
|
||||||
|
|
||||||
from kotaemon.base import BaseComponent
|
from kotaemon.base import BaseComponent
|
||||||
|
from kotaemon.base.schema import RetrievedDocument
|
||||||
from kotaemon.docstores import InMemoryDocumentStore
|
from kotaemon.docstores import InMemoryDocumentStore
|
||||||
from kotaemon.documents.base import RetrievedDocument
|
|
||||||
from kotaemon.embeddings import AzureOpenAIEmbeddings
|
from kotaemon.embeddings import AzureOpenAIEmbeddings
|
||||||
from kotaemon.llms.chats.openai import AzureChatOpenAI
|
from kotaemon.llms.chats.openai import AzureChatOpenAI
|
||||||
from kotaemon.pipelines.agents import BaseAgent
|
from kotaemon.pipelines.agents import BaseAgent
|
||||||
|
|
|
@ -5,8 +5,8 @@ from pathlib import Path
|
||||||
from theflow import Node, Param
|
from theflow import Node, Param
|
||||||
|
|
||||||
from ..base import BaseComponent
|
from ..base import BaseComponent
|
||||||
|
from ..base.schema import Document, RetrievedDocument
|
||||||
from ..docstores import BaseDocumentStore
|
from ..docstores import BaseDocumentStore
|
||||||
from ..documents.base import Document, RetrievedDocument
|
|
||||||
from ..embeddings import BaseEmbeddings
|
from ..embeddings import BaseEmbeddings
|
||||||
from ..vectorstores import BaseVectorStore
|
from ..vectorstores import BaseVectorStore
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ from typing import Any, AnyStr, Optional, Type, Union
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from kotaemon.documents.base import Document
|
from kotaemon.base import Document
|
||||||
|
|
||||||
from .base import BaseTool
|
from .base import BaseTool
|
||||||
|
|
||||||
|
|
|
@ -5,16 +5,8 @@ from typing import Callable
|
||||||
|
|
||||||
from theflow import Param
|
from theflow import Param
|
||||||
|
|
||||||
from kotaemon.base import BaseComponent
|
from kotaemon.base import BaseComponent, Document
|
||||||
from kotaemon.documents.base import Document
|
from kotaemon.base.schema import ExtractorOutput
|
||||||
|
|
||||||
|
|
||||||
class ExtractorOutput(Document):
|
|
||||||
"""
|
|
||||||
Represents the output of an extractor.
|
|
||||||
"""
|
|
||||||
|
|
||||||
matches: list[str]
|
|
||||||
|
|
||||||
|
|
||||||
class RegexExtractor(BaseComponent):
|
class RegexExtractor(BaseComponent):
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
from typing import Callable, Union
|
from typing import Callable, Union
|
||||||
|
|
||||||
from kotaemon.base import BaseComponent
|
from kotaemon.base import BaseComponent, Document
|
||||||
from kotaemon.documents.base import Document
|
|
||||||
from kotaemon.prompt.template import PromptTemplate
|
from kotaemon.prompt.template import PromptTemplate
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ from llama_index.vector_stores.types import BasePydanticVectorStore
|
||||||
from llama_index.vector_stores.types import VectorStore as LIVectorStore
|
from llama_index.vector_stores.types import VectorStore as LIVectorStore
|
||||||
from llama_index.vector_stores.types import VectorStoreQuery
|
from llama_index.vector_stores.types import VectorStoreQuery
|
||||||
|
|
||||||
from ..documents.base import Document
|
from ..base import Document
|
||||||
|
|
||||||
|
|
||||||
class BaseVectorStore(ABC):
|
class BaseVectorStore(ABC):
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from kotaemon.base import Document
|
||||||
from kotaemon.docstores import InMemoryDocumentStore
|
from kotaemon.docstores import InMemoryDocumentStore
|
||||||
from kotaemon.documents.base import Document
|
|
||||||
|
|
||||||
|
|
||||||
def test_simple_document_store_base_interfaces(tmp_path):
|
def test_simple_document_store_base_interfaces(tmp_path):
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from kotaemon.documents.base import Document, RetrievedDocument
|
from kotaemon.base.schema import Document, RetrievedDocument
|
||||||
|
|
||||||
from .conftest import skip_when_haystack_not_installed
|
from .conftest import skip_when_haystack_not_installed
|
||||||
|
|
||||||
|
|
|
@ -5,8 +5,8 @@ from typing import cast
|
||||||
import pytest
|
import pytest
|
||||||
from openai.resources.embeddings import Embeddings
|
from openai.resources.embeddings import Embeddings
|
||||||
|
|
||||||
|
from kotaemon.base import Document
|
||||||
from kotaemon.docstores import InMemoryDocumentStore
|
from kotaemon.docstores import InMemoryDocumentStore
|
||||||
from kotaemon.documents.base import Document
|
|
||||||
from kotaemon.embeddings.openai import AzureOpenAIEmbeddings
|
from kotaemon.embeddings.openai import AzureOpenAIEmbeddings
|
||||||
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
|
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
|
||||||
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
|
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
|
||||||
|
|
|
@ -4,7 +4,7 @@ from langchain.chat_models import AzureChatOpenAI as AzureChatOpenAILC
|
||||||
from langchain.schema.messages import AIMessage, HumanMessage, SystemMessage
|
from langchain.schema.messages import AIMessage, HumanMessage, SystemMessage
|
||||||
from openai.types.chat.chat_completion import ChatCompletion
|
from openai.types.chat.chat_completion import ChatCompletion
|
||||||
|
|
||||||
from kotaemon.llms.base import LLMInterface
|
from kotaemon.base.schema import LLMInterface
|
||||||
from kotaemon.llms.chats.openai import AzureChatOpenAI
|
from kotaemon.llms.chats.openai import AzureChatOpenAI
|
||||||
|
|
||||||
_openai_chat_completion_response = ChatCompletion.parse_obj(
|
_openai_chat_completion_response = ChatCompletion.parse_obj(
|
||||||
|
|
|
@ -4,7 +4,7 @@ from langchain.llms import AzureOpenAI as AzureOpenAILC
|
||||||
from langchain.llms import OpenAI as OpenAILC
|
from langchain.llms import OpenAI as OpenAILC
|
||||||
from openai.types.completion import Completion
|
from openai.types.completion import Completion
|
||||||
|
|
||||||
from kotaemon.llms.base import LLMInterface
|
from kotaemon.base.schema import LLMInterface
|
||||||
from kotaemon.llms.completions.openai import AzureOpenAI, OpenAI
|
from kotaemon.llms.completions.openai import AzureOpenAI, OpenAI
|
||||||
|
|
||||||
_openai_completion_response = Completion.parse_obj(
|
_openai_completion_response = Completion.parse_obj(
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from kotaemon.documents.base import Document
|
from kotaemon.base import Document
|
||||||
from kotaemon.post_processing.extractor import RegexExtractor
|
from kotaemon.post_processing.extractor import RegexExtractor
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from kotaemon.documents.base import Document
|
from kotaemon.base import Document
|
||||||
from kotaemon.post_processing.extractor import RegexExtractor
|
from kotaemon.post_processing.extractor import RegexExtractor
|
||||||
from kotaemon.prompt.base import BasePromptComponent
|
from kotaemon.prompt.base import BasePromptComponent
|
||||||
from kotaemon.prompt.template import PromptTemplate
|
from kotaemon.prompt.template import PromptTemplate
|
||||||
|
|
|
@ -3,7 +3,7 @@ from pathlib import Path
|
||||||
from langchain.schema import Document as LangchainDocument
|
from langchain.schema import Document as LangchainDocument
|
||||||
from llama_index.node_parser import SimpleNodeParser
|
from llama_index.node_parser import SimpleNodeParser
|
||||||
|
|
||||||
from kotaemon.documents.base import Document
|
from kotaemon.base import Document
|
||||||
from kotaemon.loaders import AutoReader
|
from kotaemon.loaders import AutoReader
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -4,8 +4,8 @@ from pathlib import Path
|
||||||
import pytest
|
import pytest
|
||||||
from openai.resources.embeddings import Embeddings
|
from openai.resources.embeddings import Embeddings
|
||||||
|
|
||||||
|
from kotaemon.base import Document
|
||||||
from kotaemon.docstores import InMemoryDocumentStore
|
from kotaemon.docstores import InMemoryDocumentStore
|
||||||
from kotaemon.documents.base import Document
|
|
||||||
from kotaemon.embeddings.openai import AzureOpenAIEmbeddings
|
from kotaemon.embeddings.openai import AzureOpenAIEmbeddings
|
||||||
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
|
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
|
||||||
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
|
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from kotaemon.documents.base import Document
|
from kotaemon.base import Document
|
||||||
from kotaemon.vectorstores import ChromaVectorStore, InMemoryVectorStore
|
from kotaemon.vectorstores import ChromaVectorStore, InMemoryVectorStore
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user