Move Document and other interface into base/schema (#69)

This commit is contained in:
Nguyen Trung Duc (john) 2023-11-14 11:51:10 +07:00 committed by GitHub
parent 4704e2c11a
commit 8532138842
34 changed files with 51 additions and 63 deletions

View File

@ -1,3 +1,4 @@
from .component import BaseComponent from .component import BaseComponent
from .schema import Document
__all__ = ["BaseComponent"] __all__ = ["BaseComponent", "Document"]

View File

@ -1,3 +1,5 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Optional, TypeVar from typing import TYPE_CHECKING, Any, Optional, TypeVar
from llama_index.bridge.pydantic import Field from llama_index.bridge.pydantic import Field
@ -72,3 +74,19 @@ class RetrievedDocument(Document):
score: float = Field(default=0.0) score: float = Field(default=0.0)
retrieval_metadata: dict = Field(default={}) retrieval_metadata: dict = Field(default={})
class LLMInterface(Document):
candidates: list[str] = Field(default_factory=list)
completion_tokens: int = -1
total_tokens: int = -1
prompt_tokens: int = -1
logits: list[list[float]] = Field(default_factory=list)
class ExtractorOutput(Document):
"""
Represents the output of an extractor.
"""
matches: list[str]

View File

@ -5,7 +5,7 @@ from langchain.schema.messages import AIMessage, SystemMessage
from theflow import Param, SessionCompose from theflow import Param, SessionCompose
from ..base import BaseComponent from ..base import BaseComponent
from ..llms.base import LLMInterface from ..base.schema import LLMInterface
from ..llms.chats.base import BaseMessage, HumanMessage from ..llms.chats.base import BaseMessage, HumanMessage

View File

@ -2,9 +2,8 @@ from typing import List, Optional
from theflow import Param from theflow import Param
from kotaemon.base import BaseComponent from kotaemon.base import BaseComponent, Document
from kotaemon.composite.linear import GatedLinearPipeline from kotaemon.composite.linear import GatedLinearPipeline
from kotaemon.documents.base import Document
class SimpleBranchingPipeline(BaseComponent): class SimpleBranchingPipeline(BaseComponent):

View File

@ -1,7 +1,7 @@
from typing import Any, Callable, Optional, Union from typing import Any, Callable, Optional, Union
from kotaemon.base import BaseComponent from kotaemon.base import BaseComponent
from kotaemon.documents.base import Document, IO_Type from kotaemon.base.schema import Document, IO_Type
from kotaemon.llms.chats.base import ChatLLM from kotaemon.llms.chats.base import ChatLLM
from kotaemon.llms.completions.base import LLM from kotaemon.llms.completions.base import LLM
from kotaemon.prompt.base import BasePromptComponent from kotaemon.prompt.base import BasePromptComponent

View File

@ -2,7 +2,7 @@ from abc import ABC, abstractmethod
from pathlib import Path from pathlib import Path
from typing import List, Optional, Union from typing import List, Optional, Union
from ..documents.base import Document from ..base import Document
class BaseDocumentStore(ABC): class BaseDocumentStore(ABC):

View File

@ -2,7 +2,7 @@ import json
from pathlib import Path from pathlib import Path
from typing import List, Optional, Union from typing import List, Optional, Union
from ..documents.base import Document from ..base import Document
from .base import BaseDocumentStore from .base import BaseDocumentStore

View File

@ -1,3 +0,0 @@
from .base import Document
__all__ = ["Document"]

View File

@ -6,8 +6,7 @@ from typing import Type
from langchain.schema.embeddings import Embeddings as LCEmbeddings from langchain.schema.embeddings import Embeddings as LCEmbeddings
from theflow import Param from theflow import Param
from ..base import BaseComponent from ..base import BaseComponent, Document
from ..documents.base import Document
class BaseEmbeddings(BaseComponent): class BaseEmbeddings(BaseComponent):

View File

@ -1,13 +0,0 @@
from typing import List
from pydantic import Field
from kotaemon.documents.base import Document
class LLMInterface(Document):
candidates: List[str] = Field(default_factory=list)
completion_tokens: int = -1
total_tokens: int = -1
prompt_tokens: int = -1
logits: List[List[float]] = Field(default_factory=list)

View File

@ -8,7 +8,7 @@ from langchain.schema.messages import BaseMessage, HumanMessage
from theflow.base import Param from theflow.base import Param
from ...base import BaseComponent from ...base import BaseComponent
from ..base import LLMInterface from ...base.schema import LLMInterface
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@ -5,7 +5,7 @@ from langchain.llms.base import BaseLLM
from theflow.base import Param from theflow.base import Param
from ...base import BaseComponent from ...base import BaseComponent
from ..base import LLMInterface from ...base.schema import LLMInterface
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@ -4,8 +4,7 @@ from typing import Any, List, Type, Union
from llama_index import SimpleDirectoryReader, download_loader from llama_index import SimpleDirectoryReader, download_loader
from llama_index.readers.base import BaseReader from llama_index.readers.base import BaseReader
from ..base import BaseComponent from ..base import BaseComponent, Document
from ..documents.base import Document
class AutoReader(BaseComponent): class AutoReader(BaseComponent):

View File

@ -8,7 +8,7 @@ from typing import Any, List, Optional, Union
from llama_index.readers.base import BaseReader from llama_index.readers.base import BaseReader
from kotaemon.documents import Document from kotaemon.base import Document
class PandasExcelReader(BaseReader): class PandasExcelReader(BaseReader):

View File

@ -8,7 +8,7 @@ import requests
from langchain.utils import get_from_dict_or_env from langchain.utils import get_from_dict_or_env
from llama_index.readers.base import BaseReader from llama_index.readers.base import BaseReader
from kotaemon.documents import Document from kotaemon.base import Document
from .utils.table import parse_markdown_text_to_tables, strip_special_chars_markdown from .utils.table import parse_markdown_text_to_tables, strip_special_chars_markdown

View File

@ -5,7 +5,7 @@ from uuid import uuid4
import requests import requests
from llama_index.readers.base import BaseReader from llama_index.readers.base import BaseReader
from kotaemon.documents import Document from kotaemon.base import Document
from .utils.pdf_ocr import parse_ocr_output, read_pdf_unstructured from .utils.pdf_ocr import parse_ocr_output, read_pdf_unstructured
from .utils.table import strip_special_chars_markdown from .utils.table import strip_special_chars_markdown

View File

@ -4,9 +4,7 @@ from llama_index.node_parser import SimpleNodeParser as LISimpleNodeParser
from llama_index.node_parser.interface import NodeParser from llama_index.node_parser.interface import NodeParser
from llama_index.text_splitter import TokenTextSplitter from llama_index.text_splitter import TokenTextSplitter
from kotaemon.base import BaseComponent from ..base import BaseComponent, Document
from ..documents.base import Document
__all__ = ["TokenTextSplitter"] __all__ = ["TokenTextSplitter"]

View File

@ -5,9 +5,8 @@ from pathlib import Path
from theflow import Node, Param from theflow import Node, Param
from ..base import BaseComponent from ..base import BaseComponent, Document
from ..docstores import BaseDocumentStore from ..docstores import BaseDocumentStore
from ..documents.base import Document
from ..embeddings import BaseEmbeddings from ..embeddings import BaseEmbeddings
from ..vectorstores import BaseVectorStore from ..vectorstores import BaseVectorStore

View File

@ -6,8 +6,8 @@ from theflow import Node
from theflow.utils.modules import ObjectInitDeclaration as _ from theflow.utils.modules import ObjectInitDeclaration as _
from kotaemon.base import BaseComponent from kotaemon.base import BaseComponent
from kotaemon.base.schema import RetrievedDocument
from kotaemon.docstores import InMemoryDocumentStore from kotaemon.docstores import InMemoryDocumentStore
from kotaemon.documents.base import RetrievedDocument
from kotaemon.embeddings import AzureOpenAIEmbeddings from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.llms.chats.openai import AzureChatOpenAI from kotaemon.llms.chats.openai import AzureChatOpenAI
from kotaemon.pipelines.agents import BaseAgent from kotaemon.pipelines.agents import BaseAgent

View File

@ -5,8 +5,8 @@ from pathlib import Path
from theflow import Node, Param from theflow import Node, Param
from ..base import BaseComponent from ..base import BaseComponent
from ..base.schema import Document, RetrievedDocument
from ..docstores import BaseDocumentStore from ..docstores import BaseDocumentStore
from ..documents.base import Document, RetrievedDocument
from ..embeddings import BaseEmbeddings from ..embeddings import BaseEmbeddings
from ..vectorstores import BaseVectorStore from ..vectorstores import BaseVectorStore

View File

@ -2,7 +2,7 @@ from typing import Any, AnyStr, Optional, Type, Union
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from kotaemon.documents.base import Document from kotaemon.base import Document
from .base import BaseTool from .base import BaseTool

View File

@ -5,16 +5,8 @@ from typing import Callable
from theflow import Param from theflow import Param
from kotaemon.base import BaseComponent from kotaemon.base import BaseComponent, Document
from kotaemon.documents.base import Document from kotaemon.base.schema import ExtractorOutput
class ExtractorOutput(Document):
"""
Represents the output of an extractor.
"""
matches: list[str]
class RegexExtractor(BaseComponent): class RegexExtractor(BaseComponent):

View File

@ -1,7 +1,6 @@
from typing import Callable, Union from typing import Callable, Union
from kotaemon.base import BaseComponent from kotaemon.base import BaseComponent, Document
from kotaemon.documents.base import Document
from kotaemon.prompt.template import PromptTemplate from kotaemon.prompt.template import PromptTemplate

View File

@ -6,7 +6,7 @@ from llama_index.vector_stores.types import BasePydanticVectorStore
from llama_index.vector_stores.types import VectorStore as LIVectorStore from llama_index.vector_stores.types import VectorStore as LIVectorStore
from llama_index.vector_stores.types import VectorStoreQuery from llama_index.vector_stores.types import VectorStoreQuery
from ..documents.base import Document from ..base import Document
class BaseVectorStore(ABC): class BaseVectorStore(ABC):

View File

@ -1,7 +1,7 @@
import pytest import pytest
from kotaemon.base import Document
from kotaemon.docstores import InMemoryDocumentStore from kotaemon.docstores import InMemoryDocumentStore
from kotaemon.documents.base import Document
def test_simple_document_store_base_interfaces(tmp_path): def test_simple_document_store_base_interfaces(tmp_path):

View File

@ -1,4 +1,4 @@
from kotaemon.documents.base import Document, RetrievedDocument from kotaemon.base.schema import Document, RetrievedDocument
from .conftest import skip_when_haystack_not_installed from .conftest import skip_when_haystack_not_installed

View File

@ -5,8 +5,8 @@ from typing import cast
import pytest import pytest
from openai.resources.embeddings import Embeddings from openai.resources.embeddings import Embeddings
from kotaemon.base import Document
from kotaemon.docstores import InMemoryDocumentStore from kotaemon.docstores import InMemoryDocumentStore
from kotaemon.documents.base import Document
from kotaemon.embeddings.openai import AzureOpenAIEmbeddings from kotaemon.embeddings.openai import AzureOpenAIEmbeddings
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline

View File

@ -4,7 +4,7 @@ from langchain.chat_models import AzureChatOpenAI as AzureChatOpenAILC
from langchain.schema.messages import AIMessage, HumanMessage, SystemMessage from langchain.schema.messages import AIMessage, HumanMessage, SystemMessage
from openai.types.chat.chat_completion import ChatCompletion from openai.types.chat.chat_completion import ChatCompletion
from kotaemon.llms.base import LLMInterface from kotaemon.base.schema import LLMInterface
from kotaemon.llms.chats.openai import AzureChatOpenAI from kotaemon.llms.chats.openai import AzureChatOpenAI
_openai_chat_completion_response = ChatCompletion.parse_obj( _openai_chat_completion_response = ChatCompletion.parse_obj(

View File

@ -4,7 +4,7 @@ from langchain.llms import AzureOpenAI as AzureOpenAILC
from langchain.llms import OpenAI as OpenAILC from langchain.llms import OpenAI as OpenAILC
from openai.types.completion import Completion from openai.types.completion import Completion
from kotaemon.llms.base import LLMInterface from kotaemon.base.schema import LLMInterface
from kotaemon.llms.completions.openai import AzureOpenAI, OpenAI from kotaemon.llms.completions.openai import AzureOpenAI, OpenAI
_openai_completion_response = Completion.parse_obj( _openai_completion_response = Completion.parse_obj(

View File

@ -1,6 +1,6 @@
import pytest import pytest
from kotaemon.documents.base import Document from kotaemon.base import Document
from kotaemon.post_processing.extractor import RegexExtractor from kotaemon.post_processing.extractor import RegexExtractor

View File

@ -1,6 +1,6 @@
import pytest import pytest
from kotaemon.documents.base import Document from kotaemon.base import Document
from kotaemon.post_processing.extractor import RegexExtractor from kotaemon.post_processing.extractor import RegexExtractor
from kotaemon.prompt.base import BasePromptComponent from kotaemon.prompt.base import BasePromptComponent
from kotaemon.prompt.template import PromptTemplate from kotaemon.prompt.template import PromptTemplate

View File

@ -3,7 +3,7 @@ from pathlib import Path
from langchain.schema import Document as LangchainDocument from langchain.schema import Document as LangchainDocument
from llama_index.node_parser import SimpleNodeParser from llama_index.node_parser import SimpleNodeParser
from kotaemon.documents.base import Document from kotaemon.base import Document
from kotaemon.loaders import AutoReader from kotaemon.loaders import AutoReader

View File

@ -4,8 +4,8 @@ from pathlib import Path
import pytest import pytest
from openai.resources.embeddings import Embeddings from openai.resources.embeddings import Embeddings
from kotaemon.base import Document
from kotaemon.docstores import InMemoryDocumentStore from kotaemon.docstores import InMemoryDocumentStore
from kotaemon.documents.base import Document
from kotaemon.embeddings.openai import AzureOpenAIEmbeddings from kotaemon.embeddings.openai import AzureOpenAIEmbeddings
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline

View File

@ -1,6 +1,6 @@
import json import json
from kotaemon.documents.base import Document from kotaemon.base import Document
from kotaemon.vectorstores import ChromaVectorStore, InMemoryVectorStore from kotaemon.vectorstores import ChromaVectorStore, InMemoryVectorStore