Update docs (#106)

This commit is contained in:
ian_Cin 2024-01-30 18:50:17 +07:00 committed by GitHub
parent cbe40fac99
commit 116919b346
57 changed files with 133 additions and 66 deletions

View File

@ -3,9 +3,7 @@
Quick and easy AI components to build Kotaemon - applicable in client Quick and easy AI components to build Kotaemon - applicable in client
project. project.
## Documentation [Documentation](https://docs.bleh-internal.cinnamon.is/)
https://docs.promptui.dm.cinnamon.is
## Install ## Install

View File

@ -139,8 +139,8 @@ Examples: https://github.com/Cinnamon/kotaemon/pull/2
- 1st line message is the PR title. - 1st line message is the PR title.
- The text area is the PR description. - The text area is the PR description.
![image](https://github.com/Cinnamon/kotaemon/assets/35283585/e2593010-d7ef-46e3-8719-6fcae0315b5d) ![image](images/274787925-e2593010-d7ef-46e3-8719-6fcae0315b5d.png)
![image](https://github.com/Cinnamon/kotaemon/assets/35283585/bfe6a117-85cd-4dd4-b432-197c791a9901) ![image](images/274787941-bfe6a117-85cd-4dd4-b432-197c791a9901.png)
## Develop pipelines ## Develop pipelines

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 162 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 107 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 111 KiB

View File

@ -3,8 +3,17 @@
`kotaemon` library focuses on the AI building blocks to implement the Kotaemon. It can be used in both client project and in product development. It consists of base interfaces, core components and a list of utilities: `kotaemon` library focuses on the AI building blocks to implement the Kotaemon. It can be used in both client project and in product development. It consists of base interfaces, core components and a list of utilities:
- Base interfaces: `kotaemon` defines the base interface of a component in a pipeline. A pipeline is also a component. By clearly define this interface, a pipeline of steps can be easily constructed and orchestrated. - Base interfaces: `kotaemon` defines the base interface of a component in a pipeline. A pipeline is also a component. By clearly define this interface, a pipeline of steps can be easily constructed and orchestrated.
- Core components: `kotaemon` implements (or wraps 3rd-party libraries like Langchain, llama-index,... when possible) commonly used components in kotaemon use cases. Some of these components are: LLM, vector store, document store, retriever... For a detailed list and description of these components, please refer to the [Pipeline Components](Pipeline-Components) and [Data & Data Structure Components](Data-&-Data-Structure-Components) sections. - Core components: `kotaemon` implements (or wraps 3rd-party libraries
- List of utilities: `lib-knowledge` provides utilities and tools that are usually needed in client project. For example, it provides a prompt engineering UI for AI developers in a project to quickly create a prompt engineering tool for DMs and QALs. It also provides a command to quickly spin up a project code base. For a full list and description of these utilities, please refer to the [Utilities](Utilities) section. like Langchain, llama-index,... when possible) commonly used components in
kotaemon use cases. Some of these components are: LLM, vector store,
document store, retriever... For a detailed list and description of these
components, please refer to the [API Reference](/reference/nav/) section.
- List of utilities: `lib-knowledge` provides utilities and tools that are
usually needed in client project. For example, it provides a prompt
engineering UI for AI developers in a project to quickly create a prompt
engineering tool for DMs and QALs. It also provides a command to quickly spin
up a project code base. For a full list and description of these utilities,
please refer to the [Tutorial/Utilities](/ultilities) section.
```mermaid ```mermaid
mindmap mindmap

View File

@ -13,6 +13,8 @@ while doc_dir.name != doc_dir_name and doc_dir != doc_dir.parent:
if doc_dir == doc_dir.parent: if doc_dir == doc_dir.parent:
raise ValueError(f"root_name ({doc_dir_name}) not in path ({str(Path(__file__))}).") raise ValueError(f"root_name ({doc_dir_name}) not in path ({str(Path(__file__))}).")
nav_title_map = {"cli": "CLI", "llms": "LLMs"}
def generate_docs_for_src_code( def generate_docs_for_src_code(
code_dir: Path, target_doc_folder: str, ignored_modules: Iterable[Any] = [] code_dir: Path, target_doc_folder: str, ignored_modules: Iterable[Any] = []
@ -53,7 +55,9 @@ def generate_docs_for_src_code(
if ignore: if ignore:
continue continue
nav_titles = [name.replace("_", " ").title() for name in parts] nav_titles = [
nav_title_map.get(name, name.replace("_", " ").title()) for name in parts
]
nav[nav_titles] = doc_path.as_posix() nav[nav_titles] = doc_path.as_posix()
with mkdocs_gen_files.open(full_doc_path, "w") as f: with mkdocs_gen_files.open(full_doc_path, "w") as f:
@ -69,7 +73,7 @@ def generate_docs_for_src_code(
generate_docs_for_src_code( generate_docs_for_src_code(
code_dir=doc_dir.parent / "libs" / "kotaemon", code_dir=doc_dir.parent / "libs" / "kotaemon" / "kotaemon",
target_doc_folder="reference", target_doc_folder="reference",
ignored_modules={"contribs"}, ignored_modules={"contribs"},
) )

View File

@ -2,7 +2,7 @@ Utilities detail can be referred in the sub-pages of this section.
## Prompt engineering UI ## Prompt engineering UI
![chat-ui](https://github.com/Cinnamon/kotaemon/assets/35283585/ac8f9aac-d853-4571-a48b-d866a99eaf3e) ![chat-ui](images/271332562-ac8f9aac-d853-4571-a48b-d866a99eaf3e.png)
**_Important:_** despite the name prompt engineering UI, this tool allows DMs to test any kind of parameters that are exposed by AIRs. Prompt is one kind of param. There can be other type of params that DMs can tweak (e.g. top_k, temperature...). **_Important:_** despite the name prompt engineering UI, this tool allows DMs to test any kind of parameters that are exposed by AIRs. Prompt is one kind of param. There can be other type of params that DMs can tweak (e.g. top_k, temperature...).
@ -146,7 +146,7 @@ $ kh promptui run <path/to/config/file.yml>
This will generate an UI as follow: This will generate an UI as follow:
![Screenshot from 2023-09-20 12-20-31](https://github.com/Cinnamon/kotaemon/assets/35283585/9ac1b95a-b667-42e7-b318-98a1b805d6df) ![Screenshot from 2023-09-20 12-20-31](images/269170198-9ac1b95a-b667-42e7-b318-98a1b805d6df.png)
where: where:

View File

@ -5,9 +5,10 @@ from dataclasses import dataclass
from enum import Enum from enum import Enum
from typing import Any, Dict, Literal, NamedTuple, Optional, Union from typing import Any, Dict, Literal, NamedTuple, Optional, Union
from kotaemon.base import LLMInterface
from pydantic import Extra from pydantic import Extra
from kotaemon.base import LLMInterface
def check_log(): def check_log():
""" """

View File

@ -1,10 +1,11 @@
from typing import List, Optional from typing import List, Optional
from kotaemon.llms import LLM, ChatLLM
from langchain.agents import AgentType as LCAgentType from langchain.agents import AgentType as LCAgentType
from langchain.agents import initialize_agent from langchain.agents import initialize_agent
from langchain.agents.agent import AgentExecutor as LCAgentExecutor from langchain.agents.agent import AgentExecutor as LCAgentExecutor
from kotaemon.llms import LLM, ChatLLM
from .base import BaseAgent from .base import BaseAgent
from .io import AgentOutput, AgentType from .io import AgentOutput, AgentType
from .tools import BaseTool from .tools import BaseTool

View File

@ -1,9 +1,10 @@
from typing import Any, Callable, Dict, Optional, Tuple, Type, Union from typing import Any, Callable, Dict, Optional, Tuple, Type, Union
from kotaemon.base import BaseComponent
from langchain.agents import Tool as LCTool from langchain.agents import Tool as LCTool
from pydantic import BaseModel from pydantic import BaseModel
from kotaemon.base import BaseComponent
class ToolException(Exception): class ToolException(Exception):
"""An optional exception that tool throws when execution error occurs. """An optional exception that tool throws when execution error occurs.

View File

@ -1,8 +1,9 @@
from typing import AnyStr, Optional, Type from typing import AnyStr, Optional, Type
from kotaemon.llms import BaseLLM
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from kotaemon.llms import BaseLLM
from .base import BaseTool, ToolException from .base import BaseTool, ToolException

View File

@ -1,8 +1,9 @@
from typing import Any, AnyStr, Optional, Type, Union from typing import Any, AnyStr, Optional, Type, Union
from kotaemon.base import Document
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from kotaemon.base import Document
from .base import BaseTool from .base import BaseTool

View File

@ -1,9 +1,10 @@
from abc import abstractmethod from abc import abstractmethod
from typing import Iterator, Optional from typing import Iterator, Optional
from kotaemon.base.schema import Document
from theflow import Function, Node, Param, lazy from theflow import Function, Node, Param, lazy
from kotaemon.base.schema import Document
class BaseComponent(Function): class BaseComponent(Function):
"""A component is a class that can be used to compose a pipeline. """A component is a class that can be used to compose a pipeline.

View File

@ -1,9 +1,10 @@
from abc import abstractmethod from abc import abstractmethod
from typing import List, Optional from typing import List, Optional
from theflow import SessionFunction
from kotaemon.base import BaseComponent, LLMInterface from kotaemon.base import BaseComponent, LLMInterface
from kotaemon.base.schema import AIMessage, BaseMessage, HumanMessage, SystemMessage from kotaemon.base.schema import AIMessage, BaseMessage, HumanMessage, SystemMessage
from theflow import SessionFunction
class BaseChatBot(BaseComponent): class BaseChatBot(BaseComponent):

View File

@ -36,9 +36,10 @@ def export(export_path, output):
"""Export a pipeline to a config file""" """Export a pipeline to a config file"""
import sys import sys
from kotaemon.contribs.promptui.config import export_pipeline_to_config
from theflow.utils.modules import import_dotted_string from theflow.utils.modules import import_dotted_string
from kotaemon.contribs.promptui.config import export_pipeline_to_config
sys.path.append(os.getcwd()) sys.path.append(os.getcwd())
cls = import_dotted_string(export_path, safe=False) cls = import_dotted_string(export_path, safe=False)
export_pipeline_to_config(cls, output) export_pipeline_to_config(cls, output)

View File

@ -4,6 +4,7 @@ from pathlib import Path
from typing import Any, Dict, Optional, Type, Union from typing import Any, Dict, Optional, Type, Union
import yaml import yaml
from kotaemon.base import BaseComponent from kotaemon.base import BaseComponent
from kotaemon.chatbot import BaseChatBot from kotaemon.chatbot import BaseChatBot

View File

@ -6,10 +6,11 @@ from typing import Any, Dict, List, Type, Union
import pandas as pd import pandas as pd
import yaml import yaml
from kotaemon.base import BaseComponent
from theflow.storage import storage from theflow.storage import storage
from theflow.utils.modules import import_dotted_string from theflow.utils.modules import import_dotted_string
from kotaemon.base import BaseComponent
from .logs import ResultLog from .logs import ResultLog

View File

@ -3,11 +3,12 @@ from datetime import datetime
from pathlib import Path from pathlib import Path
import gradio as gr import gradio as gr
from theflow.storage import storage
from kotaemon.chatbot import ChatConversation from kotaemon.chatbot import ChatConversation
from kotaemon.contribs.promptui.base import get_component from kotaemon.contribs.promptui.base import get_component
from kotaemon.contribs.promptui.export import export from kotaemon.contribs.promptui.export import export
from kotaemon.contribs.promptui.ui.blocks import ChatBlock from kotaemon.contribs.promptui.ui.blocks import ChatBlock
from theflow.storage import storage
from ..logs import ResultLog from ..logs import ResultLog

View File

@ -6,9 +6,10 @@ from typing import Any, Dict
import gradio as gr import gradio as gr
import pandas as pd import pandas as pd
from theflow.storage import storage
from kotaemon.contribs.promptui.base import get_component from kotaemon.contribs.promptui.base import get_component
from kotaemon.contribs.promptui.export import export from kotaemon.contribs.promptui.export import export
from theflow.storage import storage
from ..logs import ResultLog from ..logs import ResultLog

View File

@ -3,9 +3,10 @@ from __future__ import annotations
from abc import abstractmethod from abc import abstractmethod
from typing import Any, Type from typing import Any, Type
from kotaemon.base import BaseComponent, Document, RetrievedDocument
from llama_index.node_parser.interface import NodeParser from llama_index.node_parser.interface import NodeParser
from kotaemon.base import BaseComponent, Document, RetrievedDocument
class DocTransformer(BaseComponent): class DocTransformer(BaseComponent):
"""This is a base class for document transformers """This is a base class for document transformers

View File

@ -1,5 +1,7 @@
from pathlib import Path from pathlib import Path
from llama_index.readers.base import BaseReader
from kotaemon.base import BaseComponent, Document, Param from kotaemon.base import BaseComponent, Document, Param
from kotaemon.indices.extractors import BaseDocParser from kotaemon.indices.extractors import BaseDocParser
from kotaemon.indices.splitters import BaseSplitter, TokenSplitter from kotaemon.indices.splitters import BaseSplitter, TokenSplitter
@ -11,7 +13,6 @@ from kotaemon.loaders import (
PandasExcelReader, PandasExcelReader,
UnstructuredReader, UnstructuredReader,
) )
from llama_index.readers.base import BaseReader
class DocumentIngestor(BaseComponent): class DocumentIngestor(BaseComponent):

View File

@ -1,9 +1,10 @@
from typing import Iterator, List from typing import Iterator, List
from pydantic import BaseModel, Field
from kotaemon.base import BaseComponent from kotaemon.base import BaseComponent
from kotaemon.base.schema import HumanMessage, SystemMessage from kotaemon.base.schema import HumanMessage, SystemMessage
from kotaemon.llms import BaseLLM from kotaemon.llms import BaseLLM
from pydantic import BaseModel, Field
class FactWithEvidence(BaseModel): class FactWithEvidence(BaseModel):

View File

@ -2,9 +2,10 @@ from __future__ import annotations
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from langchain.output_parsers.boolean import BooleanOutputParser
from kotaemon.base import Document from kotaemon.base import Document
from kotaemon.llms import BaseLLM, PromptTemplate from kotaemon.llms import BaseLLM, PromptTemplate
from langchain.output_parsers.boolean import BooleanOutputParser
from .base import BaseReranking from .base import BaseReranking

View File

@ -1,6 +1,7 @@
from kotaemon.base import BaseComponent
from langchain_core.language_models.base import BaseLanguageModel from langchain_core.language_models.base import BaseLanguageModel
from kotaemon.base import BaseComponent
class BaseLLM(BaseComponent): class BaseLLM(BaseComponent):
def to_langchain_format(self) -> BaseLanguageModel: def to_langchain_format(self) -> BaseLanguageModel:

View File

@ -156,6 +156,7 @@ class GatedBranchingPipeline(SimpleBranchingPipeline):
if __name__ == "__main__": if __name__ == "__main__":
import dotenv import dotenv
from kotaemon.llms import AzureChatOpenAI, BasePromptComponent from kotaemon.llms import AzureChatOpenAI, BasePromptComponent
from kotaemon.parsers import RegexExtractor from kotaemon.parsers import RegexExtractor

View File

@ -1,9 +1,10 @@
from copy import deepcopy from copy import deepcopy
from typing import Callable, List from typing import Callable, List
from kotaemon.base import BaseComponent, Document
from theflow import Function, Node, Param from theflow import Function, Node, Param
from kotaemon.base import BaseComponent, Document
from .chats import AzureChatOpenAI from .chats import AzureChatOpenAI
from .completions import LLM from .completions import LLM
from .prompts import BasePromptComponent from .prompts import BasePromptComponent
@ -84,7 +85,7 @@ class Thought(BaseComponent):
@Node.auto(depends_on="prompt") @Node.auto(depends_on="prompt")
def prompt_template(self): def prompt_template(self):
"""Automatically wrap around param prompt. Can ignore""" """Automatically wrap around param prompt. Can ignore"""
return BasePromptComponent(self.prompt) return BasePromptComponent(template=self.prompt)
def run(self, **kwargs) -> Document: def run(self, **kwargs) -> Document:
"""Run the chain of thought""" """Run the chain of thought"""

View File

@ -1,4 +1,6 @@
from typing import Callable, Union from typing import Callable
from theflow import Param
from kotaemon.base import BaseComponent, Document from kotaemon.base import BaseComponent, Document
@ -19,14 +21,18 @@ class BasePromptComponent(BaseComponent):
middleware_switches = {"theflow.middleware.CachingMiddleware": False} middleware_switches = {"theflow.middleware.CachingMiddleware": False}
allow_extra = True allow_extra = True
def __init__(self, template: Union[str, PromptTemplate], **kwargs): template: str | PromptTemplate
super().__init__()
self.template = ( @Param.auto(depends_on="template")
template def template__(self):
if isinstance(template, PromptTemplate) return (
else PromptTemplate(template) self.template
if isinstance(self.template, PromptTemplate)
else PromptTemplate(self.template)
) )
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.__set(**kwargs) self.__set(**kwargs)
def __check_redundant_kwargs(self, **kwargs): def __check_redundant_kwargs(self, **kwargs):
@ -42,7 +48,7 @@ class BasePromptComponent(BaseComponent):
Returns: Returns:
None None
""" """
self.template.check_redundant_kwargs(**kwargs) self.template__.check_redundant_kwargs(**kwargs)
def __check_unset_placeholders(self): def __check_unset_placeholders(self):
""" """
@ -58,7 +64,7 @@ class BasePromptComponent(BaseComponent):
Returns: Returns:
None None
""" """
self.template.check_missing_kwargs(**self.__dict__) self.template__.check_missing_kwargs(**self.__dict__)
def __validate_value_type(self, **kwargs): def __validate_value_type(self, **kwargs):
""" """
@ -76,6 +82,8 @@ class BasePromptComponent(BaseComponent):
""" """
type_error = [] type_error = []
for k, v in kwargs.items(): for k, v in kwargs.items():
if k.startswith("template"):
continue
if not isinstance(v, (str, int, Document, Callable)): # type: ignore if not isinstance(v, (str, int, Document, Callable)): # type: ignore
type_error.append((k, type(v))) type_error.append((k, type(v)))
@ -122,7 +130,7 @@ class BasePromptComponent(BaseComponent):
) )
kwargs = {} kwargs = {}
for k in self.template.placeholders: for k in self.template__.placeholders:
v = getattr(self, k) v = getattr(self, k)
# if get a callable, execute to get its output # if get a callable, execute to get its output
@ -141,7 +149,7 @@ class BasePromptComponent(BaseComponent):
return kwargs return kwargs
def set(self, **kwargs): def set_value(self, **kwargs):
""" """
Similar to `__set` but for external use. Similar to `__set` but for external use.
@ -172,7 +180,7 @@ class BasePromptComponent(BaseComponent):
self.__check_unset_placeholders() self.__check_unset_placeholders()
prepared_kwargs = self.__prepare_value() prepared_kwargs = self.__prepare_value()
text = self.template.populate(**prepared_kwargs) text = self.template__.populate(**prepared_kwargs)
return Document(text=text, metadata={"origin": "PromptComponent"}) return Document(text=text, metadata={"origin": "PromptComponent"})
def flow(self): def flow(self):

View File

@ -1,10 +1,11 @@
from pathlib import Path from pathlib import Path
from typing import Any, List, Type, Union from typing import Any, List, Type, Union
from kotaemon.base import BaseComponent, Document
from llama_index import SimpleDirectoryReader, download_loader from llama_index import SimpleDirectoryReader, download_loader
from llama_index.readers.base import BaseReader from llama_index.readers.base import BaseReader
from kotaemon.base import BaseComponent, Document
class AutoReader(BaseComponent): class AutoReader(BaseComponent):
"""General auto reader for a variety of files. (based on llama-hub)""" """General auto reader for a variety of files. (based on llama-hub)"""

View File

@ -6,9 +6,10 @@ Pandas parser for .xlsx files.
from pathlib import Path from pathlib import Path
from typing import Any, List, Optional, Union from typing import Any, List, Optional, Union
from kotaemon.base import Document
from llama_index.readers.base import BaseReader from llama_index.readers.base import BaseReader
from kotaemon.base import Document
class PandasExcelReader(BaseReader): class PandasExcelReader(BaseReader):
r"""Pandas-based CSV parser. r"""Pandas-based CSV parser.

View File

@ -5,10 +5,11 @@ from pathlib import Path
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
import requests import requests
from kotaemon.base import Document
from langchain.utils import get_from_dict_or_env from langchain.utils import get_from_dict_or_env
from llama_index.readers.base import BaseReader from llama_index.readers.base import BaseReader
from kotaemon.base import Document
from .utils.table import parse_markdown_text_to_tables, strip_special_chars_markdown from .utils.table import parse_markdown_text_to_tables, strip_special_chars_markdown

View File

@ -3,9 +3,10 @@ from typing import List, Optional
from uuid import uuid4 from uuid import uuid4
import requests import requests
from kotaemon.base import Document
from llama_index.readers.base import BaseReader from llama_index.readers.base import BaseReader
from kotaemon.base import Document
from .utils.pdf_ocr import parse_ocr_output, read_pdf_unstructured from .utils.pdf_ocr import parse_ocr_output, read_pdf_unstructured
from .utils.table import strip_special_chars_markdown from .utils.table import strip_special_chars_markdown

View File

@ -12,9 +12,10 @@ pip install xlrd
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from kotaemon.base import Document
from llama_index.readers.base import BaseReader from llama_index.readers.base import BaseReader
from kotaemon.base import Document
class UnstructuredReader(BaseReader): class UnstructuredReader(BaseReader):
"""General unstructured text reader for a variety of files.""" """General unstructured text reader for a variety of files."""

View File

@ -3,12 +3,13 @@ from __future__ import annotations
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Any, Optional from typing import Any, Optional
from kotaemon.base import DocumentWithEmbedding
from llama_index.schema import NodeRelationship, RelatedNodeInfo from llama_index.schema import NodeRelationship, RelatedNodeInfo
from llama_index.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.types import BasePydanticVectorStore
from llama_index.vector_stores.types import VectorStore as LIVectorStore from llama_index.vector_stores.types import VectorStore as LIVectorStore
from llama_index.vector_stores.types import VectorStoreQuery from llama_index.vector_stores.types import VectorStoreQuery
from kotaemon.base import DocumentWithEmbedding
class BaseVectorStore(ABC): class BaseVectorStore(ABC):
@abstractmethod @abstractmethod

View File

@ -3,10 +3,11 @@ from pathlib import Path
from typing import Any, Optional, Type from typing import Any, Optional, Type
import fsspec import fsspec
from kotaemon.base import DocumentWithEmbedding
from llama_index.vector_stores import SimpleVectorStore as LISimpleVectorStore from llama_index.vector_stores import SimpleVectorStore as LISimpleVectorStore
from llama_index.vector_stores.simple import SimpleVectorStoreData from llama_index.vector_stores.simple import SimpleVectorStoreData
from kotaemon.base import DocumentWithEmbedding
from .base import LlamaIndexVectorStore from .base import LlamaIndexVectorStore

View File

@ -1,6 +1,8 @@
from unittest.mock import patch from unittest.mock import patch
import pytest import pytest
from openai.types.chat.chat_completion import ChatCompletion
from kotaemon.agents import ( from kotaemon.agents import (
AgentType, AgentType,
BaseTool, BaseTool,
@ -12,7 +14,6 @@ from kotaemon.agents import (
WikipediaTool, WikipediaTool,
) )
from kotaemon.llms import AzureChatOpenAI from kotaemon.llms import AzureChatOpenAI
from openai.types.chat.chat_completion import ChatCompletion
FINAL_RESPONSE_TEXT = "Final Answer: Hello Cinnamon AI!" FINAL_RESPONSE_TEXT = "Final Answer: Hello Cinnamon AI!"
REWOO_VALID_PLAN = ( REWOO_VALID_PLAN = (

View File

@ -1,6 +1,8 @@
from copy import deepcopy from copy import deepcopy
import pytest import pytest
from openai.types.chat.chat_completion import ChatCompletion
from kotaemon.llms import ( from kotaemon.llms import (
AzureChatOpenAI, AzureChatOpenAI,
BasePromptComponent, BasePromptComponent,
@ -10,7 +12,6 @@ from kotaemon.llms import (
SimpleLinearPipeline, SimpleLinearPipeline,
) )
from kotaemon.parsers import RegexExtractor from kotaemon.parsers import RegexExtractor
from openai.types.chat.chat_completion import ChatCompletion
_openai_chat_completion_response = ChatCompletion.parse_obj( _openai_chat_completion_response = ChatCompletion.parse_obj(
{ {

View File

@ -1,8 +1,9 @@
from unittest.mock import patch from unittest.mock import patch
from openai.types.chat.chat_completion import ChatCompletion
from kotaemon.llms import AzureChatOpenAI from kotaemon.llms import AzureChatOpenAI
from kotaemon.llms.cot import ManualSequentialChainOfThought, Thought from kotaemon.llms.cot import ManualSequentialChainOfThought, Thought
from openai.types.chat.chat_completion import ChatCompletion
_openai_chat_completion_response = [ _openai_chat_completion_response = [
ChatCompletion.parse_obj( ChatCompletion.parse_obj(

View File

@ -3,6 +3,7 @@ from unittest.mock import patch
import pytest import pytest
from elastic_transport import ApiResponseMeta from elastic_transport import ApiResponseMeta
from kotaemon.base import Document from kotaemon.base import Document
from kotaemon.storages import ( from kotaemon.storages import (
ElasticsearchDocumentStore, ElasticsearchDocumentStore,

View File

@ -3,11 +3,12 @@ from pathlib import Path
from typing import cast from typing import cast
import pytest import pytest
from openai.resources.embeddings import Embeddings
from kotaemon.base import Document from kotaemon.base import Document
from kotaemon.embeddings import AzureOpenAIEmbeddings from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.indices import VectorIndexing, VectorRetrieval from kotaemon.indices import VectorIndexing, VectorRetrieval
from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore
from openai.resources.embeddings import Embeddings
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f: with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
openai_embedding = json.load(f) openai_embedding = json.load(f)

View File

@ -9,6 +9,7 @@ try:
except ImportError: except ImportError:
from langchain.llms import AzureOpenAI as AzureOpenAILC from langchain.llms import AzureOpenAI as AzureOpenAILC
from langchain.llms import OpenAI as OpenAILC from langchain.llms import OpenAI as OpenAILC
from openai.types.completion import Completion from openai.types.completion import Completion
_openai_completion_response = Completion.parse_obj( _openai_completion_response = Completion.parse_obj(

View File

@ -1,4 +1,5 @@
import pytest import pytest
from kotaemon.base import Document from kotaemon.base import Document
from kotaemon.parsers import RegexExtractor from kotaemon.parsers import RegexExtractor

View File

@ -1,4 +1,5 @@
import pytest import pytest
from kotaemon.base import Document from kotaemon.base import Document
from kotaemon.llms import BasePromptComponent, PromptTemplate from kotaemon.llms import BasePromptComponent, PromptTemplate
from kotaemon.parsers import RegexExtractor from kotaemon.parsers import RegexExtractor
@ -58,5 +59,5 @@ def test_run():
def test_set_method(): def test_set_method():
template = PromptTemplate("Hello, {name}!") template = PromptTemplate("Hello, {name}!")
prompt = BasePromptComponent(template=template) prompt = BasePromptComponent(template=template)
prompt.set(name="Alice") prompt.set_value(name="Alice")
assert prompt.name == "Alice" assert prompt.name == "Alice"

View File

@ -1,9 +1,10 @@
from pathlib import Path from pathlib import Path
from langchain.schema import Document as LangchainDocument
from llama_index.node_parser import SimpleNodeParser
from kotaemon.base import Document from kotaemon.base import Document
from kotaemon.loaders import AutoReader, UnstructuredReader from kotaemon.loaders import AutoReader, UnstructuredReader
from langchain.schema import Document as LangchainDocument
from llama_index.node_parser import SimpleNodeParser
def test_pdf_reader(): def test_pdf_reader():

View File

@ -1,10 +1,11 @@
from unittest.mock import patch from unittest.mock import patch
import pytest import pytest
from openai.types.chat.chat_completion import ChatCompletion
from kotaemon.base import Document from kotaemon.base import Document
from kotaemon.indices.rankings import LLMReranking from kotaemon.indices.rankings import LLMReranking
from kotaemon.llms import AzureChatOpenAI from kotaemon.llms import AzureChatOpenAI
from openai.types.chat.chat_completion import ChatCompletion
_openai_chat_completion_responses = [ _openai_chat_completion_responses = [
ChatCompletion.parse_obj( ChatCompletion.parse_obj(

View File

@ -1,6 +1,7 @@
from llama_index.schema import NodeRelationship
from kotaemon.base import Document from kotaemon.base import Document
from kotaemon.indices.splitters import TokenSplitter from kotaemon.indices.splitters import TokenSplitter
from llama_index.schema import NodeRelationship
source1 = Document( source1 = Document(
content="The City Hall and Raffles Place MRT stations are paired cross-platform " content="The City Hall and Raffles Place MRT stations are paired cross-platform "

View File

@ -2,6 +2,7 @@ import json
from pathlib import Path from pathlib import Path
import pytest import pytest
from kotaemon.loaders import MathpixPDFReader, OCRReader, PandasExcelReader from kotaemon.loaders import MathpixPDFReader, OCRReader, PandasExcelReader
input_file = Path(__file__).parent / "resources" / "table.pdf" input_file = Path(__file__).parent / "resources" / "table.pdf"

View File

@ -51,6 +51,7 @@ def test_disable_telemetry_import_haystack_after_kotaemon():
import os import os
import haystack.telemetry import haystack.telemetry
import kotaemon # noqa: F401 import kotaemon # noqa: F401
assert haystack.telemetry.telemetry is None assert haystack.telemetry.telemetry is None

View File

@ -1,4 +1,5 @@
import pytest import pytest
from kotaemon.llms import PromptTemplate from kotaemon.llms import PromptTemplate

View File

@ -2,12 +2,13 @@ import json
from pathlib import Path from pathlib import Path
import pytest import pytest
from openai.resources.embeddings import Embeddings
from kotaemon.agents.tools import ComponentTool, GoogleSearchTool, WikipediaTool from kotaemon.agents.tools import ComponentTool, GoogleSearchTool, WikipediaTool
from kotaemon.base import Document from kotaemon.base import Document
from kotaemon.embeddings import AzureOpenAIEmbeddings from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.indices.vectorindex import VectorIndexing, VectorRetrieval from kotaemon.indices.vectorindex import VectorIndexing, VectorRetrieval
from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore
from openai.resources.embeddings import Embeddings
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f: with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
openai_embedding = json.load(f) openai_embedding = json.load(f)

View File

@ -4,10 +4,11 @@ from unittest.mock import patch
import pytest import pytest
from index import ReaderIndexingPipeline from index import ReaderIndexingPipeline
from kotaemon.llms import AzureChatOpenAI
from openai.resources.embeddings import Embeddings from openai.resources.embeddings import Embeddings
from openai.types.chat.chat_completion import ChatCompletion from openai.types.chat.chat_completion import ChatCompletion
from kotaemon.llms import AzureChatOpenAI
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f: with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
openai_embedding = json.load(f) openai_embedding = json.load(f)

View File

@ -3,11 +3,12 @@ import logging
from functools import cache from functools import cache
from pathlib import Path from pathlib import Path
from kotaemon.base import BaseComponent
from kotaemon.storages import BaseDocumentStore, BaseVectorStore
from theflow.settings import settings from theflow.settings import settings
from theflow.utils.modules import deserialize from theflow.utils.modules import deserialize
from kotaemon.base import BaseComponent
from kotaemon.storages import BaseDocumentStore, BaseVectorStore
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@ -17,10 +17,6 @@ from ktem.components import (
from ktem.db.models import Index, Source, SourceTargetRelation, engine from ktem.db.models import Index, Source, SourceTargetRelation, engine
from ktem.indexing.base import BaseIndexing, BaseRetriever from ktem.indexing.base import BaseIndexing, BaseRetriever
from ktem.indexing.exceptions import FileExistsError from ktem.indexing.exceptions import FileExistsError
from kotaemon.base import RetrievedDocument
from kotaemon.indices import VectorIndexing, VectorRetrieval
from kotaemon.indices.ingests import DocumentIngestor
from kotaemon.indices.rankings import BaseReranking, CohereReranking, LLMReranking
from llama_index.vector_stores import ( from llama_index.vector_stores import (
FilterCondition, FilterCondition,
FilterOperator, FilterOperator,
@ -31,6 +27,11 @@ from llama_index.vector_stores.types import VectorStoreQueryMode
from sqlmodel import Session, select from sqlmodel import Session, select
from theflow.settings import settings from theflow.settings import settings
from kotaemon.base import RetrievedDocument
from kotaemon.indices import VectorIndexing, VectorRetrieval
from kotaemon.indices.ingests import DocumentIngestor
from kotaemon.indices.rankings import BaseReranking, CohereReranking, LLMReranking
USER_SETTINGS = { USER_SETTINGS = {
"index_parser": { "index_parser": {
"name": "Index parser", "name": "Index parser",

View File

@ -6,6 +6,7 @@ from functools import partial
import tiktoken import tiktoken
from ktem.components import llms from ktem.components import llms
from ktem.indexing.base import BaseRetriever from ktem.indexing.base import BaseRetriever
from kotaemon.base import ( from kotaemon.base import (
BaseComponent, BaseComponent,
Document, Document,

View File

@ -60,7 +60,7 @@ plugins:
- "!^_" - "!^_"
members_order: source members_order: source
separate_signature: true separate_signature: true
paths: [libs/kotaemon] paths: [libs/kotaemon/kotaemon]
- git-revision-date-localized: - git-revision-date-localized:
enable_creation_date: true enable_creation_date: true
type: timeago type: timeago

View File

@ -4,3 +4,6 @@ skip = "*.js,*.css,*.map"
ignore-words-list = "llm,fo" ignore-words-list = "llm,fo"
quiet-level = 3 quiet-level = 3
check-filenames = "" check-filenames = ""
[tool.isort]
known_first_party = ["kotaemon"]