Move Document and other interface into base/schema (#69)
This commit is contained in:
parent
4704e2c11a
commit
8532138842
|
@ -1,3 +1,4 @@
|
|||
from .component import BaseComponent
|
||||
from .schema import Document
|
||||
|
||||
__all__ = ["BaseComponent"]
|
||||
__all__ = ["BaseComponent", "Document"]
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Optional, TypeVar
|
||||
|
||||
from llama_index.bridge.pydantic import Field
|
||||
|
@ -72,3 +74,19 @@ class RetrievedDocument(Document):
|
|||
|
||||
score: float = Field(default=0.0)
|
||||
retrieval_metadata: dict = Field(default={})
|
||||
|
||||
|
||||
class LLMInterface(Document):
|
||||
candidates: list[str] = Field(default_factory=list)
|
||||
completion_tokens: int = -1
|
||||
total_tokens: int = -1
|
||||
prompt_tokens: int = -1
|
||||
logits: list[list[float]] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ExtractorOutput(Document):
|
||||
"""
|
||||
Represents the output of an extractor.
|
||||
"""
|
||||
|
||||
matches: list[str]
|
|
@ -5,7 +5,7 @@ from langchain.schema.messages import AIMessage, SystemMessage
|
|||
from theflow import Param, SessionCompose
|
||||
|
||||
from ..base import BaseComponent
|
||||
from ..llms.base import LLMInterface
|
||||
from ..base.schema import LLMInterface
|
||||
from ..llms.chats.base import BaseMessage, HumanMessage
|
||||
|
||||
|
||||
|
|
|
@ -2,9 +2,8 @@ from typing import List, Optional
|
|||
|
||||
from theflow import Param
|
||||
|
||||
from kotaemon.base import BaseComponent
|
||||
from kotaemon.base import BaseComponent, Document
|
||||
from kotaemon.composite.linear import GatedLinearPipeline
|
||||
from kotaemon.documents.base import Document
|
||||
|
||||
|
||||
class SimpleBranchingPipeline(BaseComponent):
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from typing import Any, Callable, Optional, Union
|
||||
|
||||
from kotaemon.base import BaseComponent
|
||||
from kotaemon.documents.base import Document, IO_Type
|
||||
from kotaemon.base.schema import Document, IO_Type
|
||||
from kotaemon.llms.chats.base import ChatLLM
|
||||
from kotaemon.llms.completions.base import LLM
|
||||
from kotaemon.prompt.base import BasePromptComponent
|
||||
|
|
|
@ -2,7 +2,7 @@ from abc import ABC, abstractmethod
|
|||
from pathlib import Path
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from ..documents.base import Document
|
||||
from ..base import Document
|
||||
|
||||
|
||||
class BaseDocumentStore(ABC):
|
||||
|
|
|
@ -2,7 +2,7 @@ import json
|
|||
from pathlib import Path
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from ..documents.base import Document
|
||||
from ..base import Document
|
||||
from .base import BaseDocumentStore
|
||||
|
||||
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
from .base import Document
|
||||
|
||||
__all__ = ["Document"]
|
|
@ -6,8 +6,7 @@ from typing import Type
|
|||
from langchain.schema.embeddings import Embeddings as LCEmbeddings
|
||||
from theflow import Param
|
||||
|
||||
from ..base import BaseComponent
|
||||
from ..documents.base import Document
|
||||
from ..base import BaseComponent, Document
|
||||
|
||||
|
||||
class BaseEmbeddings(BaseComponent):
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
from typing import List
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from kotaemon.documents.base import Document
|
||||
|
||||
|
||||
class LLMInterface(Document):
|
||||
candidates: List[str] = Field(default_factory=list)
|
||||
completion_tokens: int = -1
|
||||
total_tokens: int = -1
|
||||
prompt_tokens: int = -1
|
||||
logits: List[List[float]] = Field(default_factory=list)
|
|
@ -8,7 +8,7 @@ from langchain.schema.messages import BaseMessage, HumanMessage
|
|||
from theflow.base import Param
|
||||
|
||||
from ...base import BaseComponent
|
||||
from ..base import LLMInterface
|
||||
from ...base.schema import LLMInterface
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ from langchain.llms.base import BaseLLM
|
|||
from theflow.base import Param
|
||||
|
||||
from ...base import BaseComponent
|
||||
from ..base import LLMInterface
|
||||
from ...base.schema import LLMInterface
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
@ -4,8 +4,7 @@ from typing import Any, List, Type, Union
|
|||
from llama_index import SimpleDirectoryReader, download_loader
|
||||
from llama_index.readers.base import BaseReader
|
||||
|
||||
from ..base import BaseComponent
|
||||
from ..documents.base import Document
|
||||
from ..base import BaseComponent, Document
|
||||
|
||||
|
||||
class AutoReader(BaseComponent):
|
||||
|
|
|
@ -8,7 +8,7 @@ from typing import Any, List, Optional, Union
|
|||
|
||||
from llama_index.readers.base import BaseReader
|
||||
|
||||
from kotaemon.documents import Document
|
||||
from kotaemon.base import Document
|
||||
|
||||
|
||||
class PandasExcelReader(BaseReader):
|
||||
|
|
|
@ -8,7 +8,7 @@ import requests
|
|||
from langchain.utils import get_from_dict_or_env
|
||||
from llama_index.readers.base import BaseReader
|
||||
|
||||
from kotaemon.documents import Document
|
||||
from kotaemon.base import Document
|
||||
|
||||
from .utils.table import parse_markdown_text_to_tables, strip_special_chars_markdown
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ from uuid import uuid4
|
|||
import requests
|
||||
from llama_index.readers.base import BaseReader
|
||||
|
||||
from kotaemon.documents import Document
|
||||
from kotaemon.base import Document
|
||||
|
||||
from .utils.pdf_ocr import parse_ocr_output, read_pdf_unstructured
|
||||
from .utils.table import strip_special_chars_markdown
|
||||
|
|
|
@ -4,9 +4,7 @@ from llama_index.node_parser import SimpleNodeParser as LISimpleNodeParser
|
|||
from llama_index.node_parser.interface import NodeParser
|
||||
from llama_index.text_splitter import TokenTextSplitter
|
||||
|
||||
from kotaemon.base import BaseComponent
|
||||
|
||||
from ..documents.base import Document
|
||||
from ..base import BaseComponent, Document
|
||||
|
||||
__all__ = ["TokenTextSplitter"]
|
||||
|
||||
|
|
|
@ -5,9 +5,8 @@ from pathlib import Path
|
|||
|
||||
from theflow import Node, Param
|
||||
|
||||
from ..base import BaseComponent
|
||||
from ..base import BaseComponent, Document
|
||||
from ..docstores import BaseDocumentStore
|
||||
from ..documents.base import Document
|
||||
from ..embeddings import BaseEmbeddings
|
||||
from ..vectorstores import BaseVectorStore
|
||||
|
||||
|
|
|
@ -6,8 +6,8 @@ from theflow import Node
|
|||
from theflow.utils.modules import ObjectInitDeclaration as _
|
||||
|
||||
from kotaemon.base import BaseComponent
|
||||
from kotaemon.base.schema import RetrievedDocument
|
||||
from kotaemon.docstores import InMemoryDocumentStore
|
||||
from kotaemon.documents.base import RetrievedDocument
|
||||
from kotaemon.embeddings import AzureOpenAIEmbeddings
|
||||
from kotaemon.llms.chats.openai import AzureChatOpenAI
|
||||
from kotaemon.pipelines.agents import BaseAgent
|
||||
|
|
|
@ -5,8 +5,8 @@ from pathlib import Path
|
|||
from theflow import Node, Param
|
||||
|
||||
from ..base import BaseComponent
|
||||
from ..base.schema import Document, RetrievedDocument
|
||||
from ..docstores import BaseDocumentStore
|
||||
from ..documents.base import Document, RetrievedDocument
|
||||
from ..embeddings import BaseEmbeddings
|
||||
from ..vectorstores import BaseVectorStore
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ from typing import Any, AnyStr, Optional, Type, Union
|
|||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from kotaemon.documents.base import Document
|
||||
from kotaemon.base import Document
|
||||
|
||||
from .base import BaseTool
|
||||
|
||||
|
|
|
@ -5,16 +5,8 @@ from typing import Callable
|
|||
|
||||
from theflow import Param
|
||||
|
||||
from kotaemon.base import BaseComponent
|
||||
from kotaemon.documents.base import Document
|
||||
|
||||
|
||||
class ExtractorOutput(Document):
|
||||
"""
|
||||
Represents the output of an extractor.
|
||||
"""
|
||||
|
||||
matches: list[str]
|
||||
from kotaemon.base import BaseComponent, Document
|
||||
from kotaemon.base.schema import ExtractorOutput
|
||||
|
||||
|
||||
class RegexExtractor(BaseComponent):
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
from typing import Callable, Union
|
||||
|
||||
from kotaemon.base import BaseComponent
|
||||
from kotaemon.documents.base import Document
|
||||
from kotaemon.base import BaseComponent, Document
|
||||
from kotaemon.prompt.template import PromptTemplate
|
||||
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ from llama_index.vector_stores.types import BasePydanticVectorStore
|
|||
from llama_index.vector_stores.types import VectorStore as LIVectorStore
|
||||
from llama_index.vector_stores.types import VectorStoreQuery
|
||||
|
||||
from ..documents.base import Document
|
||||
from ..base import Document
|
||||
|
||||
|
||||
class BaseVectorStore(ABC):
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import pytest
|
||||
|
||||
from kotaemon.base import Document
|
||||
from kotaemon.docstores import InMemoryDocumentStore
|
||||
from kotaemon.documents.base import Document
|
||||
|
||||
|
||||
def test_simple_document_store_base_interfaces(tmp_path):
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from kotaemon.documents.base import Document, RetrievedDocument
|
||||
from kotaemon.base.schema import Document, RetrievedDocument
|
||||
|
||||
from .conftest import skip_when_haystack_not_installed
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@ from typing import cast
|
|||
import pytest
|
||||
from openai.resources.embeddings import Embeddings
|
||||
|
||||
from kotaemon.base import Document
|
||||
from kotaemon.docstores import InMemoryDocumentStore
|
||||
from kotaemon.documents.base import Document
|
||||
from kotaemon.embeddings.openai import AzureOpenAIEmbeddings
|
||||
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
|
||||
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
|
||||
|
|
|
@ -4,7 +4,7 @@ from langchain.chat_models import AzureChatOpenAI as AzureChatOpenAILC
|
|||
from langchain.schema.messages import AIMessage, HumanMessage, SystemMessage
|
||||
from openai.types.chat.chat_completion import ChatCompletion
|
||||
|
||||
from kotaemon.llms.base import LLMInterface
|
||||
from kotaemon.base.schema import LLMInterface
|
||||
from kotaemon.llms.chats.openai import AzureChatOpenAI
|
||||
|
||||
_openai_chat_completion_response = ChatCompletion.parse_obj(
|
||||
|
|
|
@ -4,7 +4,7 @@ from langchain.llms import AzureOpenAI as AzureOpenAILC
|
|||
from langchain.llms import OpenAI as OpenAILC
|
||||
from openai.types.completion import Completion
|
||||
|
||||
from kotaemon.llms.base import LLMInterface
|
||||
from kotaemon.base.schema import LLMInterface
|
||||
from kotaemon.llms.completions.openai import AzureOpenAI, OpenAI
|
||||
|
||||
_openai_completion_response = Completion.parse_obj(
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import pytest
|
||||
|
||||
from kotaemon.documents.base import Document
|
||||
from kotaemon.base import Document
|
||||
from kotaemon.post_processing.extractor import RegexExtractor
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import pytest
|
||||
|
||||
from kotaemon.documents.base import Document
|
||||
from kotaemon.base import Document
|
||||
from kotaemon.post_processing.extractor import RegexExtractor
|
||||
from kotaemon.prompt.base import BasePromptComponent
|
||||
from kotaemon.prompt.template import PromptTemplate
|
||||
|
|
|
@ -3,7 +3,7 @@ from pathlib import Path
|
|||
from langchain.schema import Document as LangchainDocument
|
||||
from llama_index.node_parser import SimpleNodeParser
|
||||
|
||||
from kotaemon.documents.base import Document
|
||||
from kotaemon.base import Document
|
||||
from kotaemon.loaders import AutoReader
|
||||
|
||||
|
||||
|
|
|
@ -4,8 +4,8 @@ from pathlib import Path
|
|||
import pytest
|
||||
from openai.resources.embeddings import Embeddings
|
||||
|
||||
from kotaemon.base import Document
|
||||
from kotaemon.docstores import InMemoryDocumentStore
|
||||
from kotaemon.documents.base import Document
|
||||
from kotaemon.embeddings.openai import AzureOpenAIEmbeddings
|
||||
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
|
||||
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import json
|
||||
|
||||
from kotaemon.documents.base import Document
|
||||
from kotaemon.base import Document
|
||||
from kotaemon.vectorstores import ChromaVectorStore, InMemoryVectorStore
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user