Move Document and other interface into base/schema (#69)

This commit is contained in:
Nguyen Trung Duc (john) 2023-11-14 11:51:10 +07:00 committed by GitHub
parent 4704e2c11a
commit 8532138842
34 changed files with 51 additions and 63 deletions

View File

@ -1,3 +1,4 @@
from .component import BaseComponent
from .schema import Document
__all__ = ["BaseComponent"]
__all__ = ["BaseComponent", "Document"]

View File

@ -1,3 +1,5 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Optional, TypeVar
from llama_index.bridge.pydantic import Field
@ -72,3 +74,19 @@ class RetrievedDocument(Document):
score: float = Field(default=0.0)
retrieval_metadata: dict = Field(default={})
class LLMInterface(Document):
candidates: list[str] = Field(default_factory=list)
completion_tokens: int = -1
total_tokens: int = -1
prompt_tokens: int = -1
logits: list[list[float]] = Field(default_factory=list)
class ExtractorOutput(Document):
"""
Represents the output of an extractor.
"""
matches: list[str]

View File

@ -5,7 +5,7 @@ from langchain.schema.messages import AIMessage, SystemMessage
from theflow import Param, SessionCompose
from ..base import BaseComponent
from ..llms.base import LLMInterface
from ..base.schema import LLMInterface
from ..llms.chats.base import BaseMessage, HumanMessage

View File

@ -2,9 +2,8 @@ from typing import List, Optional
from theflow import Param
from kotaemon.base import BaseComponent
from kotaemon.base import BaseComponent, Document
from kotaemon.composite.linear import GatedLinearPipeline
from kotaemon.documents.base import Document
class SimpleBranchingPipeline(BaseComponent):

View File

@ -1,7 +1,7 @@
from typing import Any, Callable, Optional, Union
from kotaemon.base import BaseComponent
from kotaemon.documents.base import Document, IO_Type
from kotaemon.base.schema import Document, IO_Type
from kotaemon.llms.chats.base import ChatLLM
from kotaemon.llms.completions.base import LLM
from kotaemon.prompt.base import BasePromptComponent

View File

@ -2,7 +2,7 @@ from abc import ABC, abstractmethod
from pathlib import Path
from typing import List, Optional, Union
from ..documents.base import Document
from ..base import Document
class BaseDocumentStore(ABC):

View File

@ -2,7 +2,7 @@ import json
from pathlib import Path
from typing import List, Optional, Union
from ..documents.base import Document
from ..base import Document
from .base import BaseDocumentStore

View File

@ -1,3 +0,0 @@
from .base import Document
__all__ = ["Document"]

View File

@ -6,8 +6,7 @@ from typing import Type
from langchain.schema.embeddings import Embeddings as LCEmbeddings
from theflow import Param
from ..base import BaseComponent
from ..documents.base import Document
from ..base import BaseComponent, Document
class BaseEmbeddings(BaseComponent):

View File

@ -1,13 +0,0 @@
from typing import List
from pydantic import Field
from kotaemon.documents.base import Document
class LLMInterface(Document):
candidates: List[str] = Field(default_factory=list)
completion_tokens: int = -1
total_tokens: int = -1
prompt_tokens: int = -1
logits: List[List[float]] = Field(default_factory=list)

View File

@ -8,7 +8,7 @@ from langchain.schema.messages import BaseMessage, HumanMessage
from theflow.base import Param
from ...base import BaseComponent
from ..base import LLMInterface
from ...base.schema import LLMInterface
logger = logging.getLogger(__name__)

View File

@ -5,7 +5,7 @@ from langchain.llms.base import BaseLLM
from theflow.base import Param
from ...base import BaseComponent
from ..base import LLMInterface
from ...base.schema import LLMInterface
logger = logging.getLogger(__name__)

View File

@ -4,8 +4,7 @@ from typing import Any, List, Type, Union
from llama_index import SimpleDirectoryReader, download_loader
from llama_index.readers.base import BaseReader
from ..base import BaseComponent
from ..documents.base import Document
from ..base import BaseComponent, Document
class AutoReader(BaseComponent):

View File

@ -8,7 +8,7 @@ from typing import Any, List, Optional, Union
from llama_index.readers.base import BaseReader
from kotaemon.documents import Document
from kotaemon.base import Document
class PandasExcelReader(BaseReader):

View File

@ -8,7 +8,7 @@ import requests
from langchain.utils import get_from_dict_or_env
from llama_index.readers.base import BaseReader
from kotaemon.documents import Document
from kotaemon.base import Document
from .utils.table import parse_markdown_text_to_tables, strip_special_chars_markdown

View File

@ -5,7 +5,7 @@ from uuid import uuid4
import requests
from llama_index.readers.base import BaseReader
from kotaemon.documents import Document
from kotaemon.base import Document
from .utils.pdf_ocr import parse_ocr_output, read_pdf_unstructured
from .utils.table import strip_special_chars_markdown

View File

@ -4,9 +4,7 @@ from llama_index.node_parser import SimpleNodeParser as LISimpleNodeParser
from llama_index.node_parser.interface import NodeParser
from llama_index.text_splitter import TokenTextSplitter
from kotaemon.base import BaseComponent
from ..documents.base import Document
from ..base import BaseComponent, Document
__all__ = ["TokenTextSplitter"]

View File

@ -5,9 +5,8 @@ from pathlib import Path
from theflow import Node, Param
from ..base import BaseComponent
from ..base import BaseComponent, Document
from ..docstores import BaseDocumentStore
from ..documents.base import Document
from ..embeddings import BaseEmbeddings
from ..vectorstores import BaseVectorStore

View File

@ -6,8 +6,8 @@ from theflow import Node
from theflow.utils.modules import ObjectInitDeclaration as _
from kotaemon.base import BaseComponent
from kotaemon.base.schema import RetrievedDocument
from kotaemon.docstores import InMemoryDocumentStore
from kotaemon.documents.base import RetrievedDocument
from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.llms.chats.openai import AzureChatOpenAI
from kotaemon.pipelines.agents import BaseAgent

View File

@ -5,8 +5,8 @@ from pathlib import Path
from theflow import Node, Param
from ..base import BaseComponent
from ..base.schema import Document, RetrievedDocument
from ..docstores import BaseDocumentStore
from ..documents.base import Document, RetrievedDocument
from ..embeddings import BaseEmbeddings
from ..vectorstores import BaseVectorStore

View File

@ -2,7 +2,7 @@ from typing import Any, AnyStr, Optional, Type, Union
from pydantic import BaseModel, Field
from kotaemon.documents.base import Document
from kotaemon.base import Document
from .base import BaseTool

View File

@ -5,16 +5,8 @@ from typing import Callable
from theflow import Param
from kotaemon.base import BaseComponent
from kotaemon.documents.base import Document
class ExtractorOutput(Document):
"""
Represents the output of an extractor.
"""
matches: list[str]
from kotaemon.base import BaseComponent, Document
from kotaemon.base.schema import ExtractorOutput
class RegexExtractor(BaseComponent):

View File

@ -1,7 +1,6 @@
from typing import Callable, Union
from kotaemon.base import BaseComponent
from kotaemon.documents.base import Document
from kotaemon.base import BaseComponent, Document
from kotaemon.prompt.template import PromptTemplate

View File

@ -6,7 +6,7 @@ from llama_index.vector_stores.types import BasePydanticVectorStore
from llama_index.vector_stores.types import VectorStore as LIVectorStore
from llama_index.vector_stores.types import VectorStoreQuery
from ..documents.base import Document
from ..base import Document
class BaseVectorStore(ABC):

View File

@ -1,7 +1,7 @@
import pytest
from kotaemon.base import Document
from kotaemon.docstores import InMemoryDocumentStore
from kotaemon.documents.base import Document
def test_simple_document_store_base_interfaces(tmp_path):

View File

@ -1,4 +1,4 @@
from kotaemon.documents.base import Document, RetrievedDocument
from kotaemon.base.schema import Document, RetrievedDocument
from .conftest import skip_when_haystack_not_installed

View File

@ -5,8 +5,8 @@ from typing import cast
import pytest
from openai.resources.embeddings import Embeddings
from kotaemon.base import Document
from kotaemon.docstores import InMemoryDocumentStore
from kotaemon.documents.base import Document
from kotaemon.embeddings.openai import AzureOpenAIEmbeddings
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline

View File

@ -4,7 +4,7 @@ from langchain.chat_models import AzureChatOpenAI as AzureChatOpenAILC
from langchain.schema.messages import AIMessage, HumanMessage, SystemMessage
from openai.types.chat.chat_completion import ChatCompletion
from kotaemon.llms.base import LLMInterface
from kotaemon.base.schema import LLMInterface
from kotaemon.llms.chats.openai import AzureChatOpenAI
_openai_chat_completion_response = ChatCompletion.parse_obj(

View File

@ -4,7 +4,7 @@ from langchain.llms import AzureOpenAI as AzureOpenAILC
from langchain.llms import OpenAI as OpenAILC
from openai.types.completion import Completion
from kotaemon.llms.base import LLMInterface
from kotaemon.base.schema import LLMInterface
from kotaemon.llms.completions.openai import AzureOpenAI, OpenAI
_openai_completion_response = Completion.parse_obj(

View File

@ -1,6 +1,6 @@
import pytest
from kotaemon.documents.base import Document
from kotaemon.base import Document
from kotaemon.post_processing.extractor import RegexExtractor

View File

@ -1,6 +1,6 @@
import pytest
from kotaemon.documents.base import Document
from kotaemon.base import Document
from kotaemon.post_processing.extractor import RegexExtractor
from kotaemon.prompt.base import BasePromptComponent
from kotaemon.prompt.template import PromptTemplate

View File

@ -3,7 +3,7 @@ from pathlib import Path
from langchain.schema import Document as LangchainDocument
from llama_index.node_parser import SimpleNodeParser
from kotaemon.documents.base import Document
from kotaemon.base import Document
from kotaemon.loaders import AutoReader

View File

@ -4,8 +4,8 @@ from pathlib import Path
import pytest
from openai.resources.embeddings import Embeddings
from kotaemon.base import Document
from kotaemon.docstores import InMemoryDocumentStore
from kotaemon.documents.base import Document
from kotaemon.embeddings.openai import AzureOpenAIEmbeddings
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline

View File

@ -1,6 +1,6 @@
import json
from kotaemon.documents.base import Document
from kotaemon.base import Document
from kotaemon.vectorstores import ChromaVectorStore, InMemoryVectorStore