From c1b1371a68216e8fc8c2e1f37a9dec989dd8a562 Mon Sep 17 00:00:00 2001 From: ian Date: Wed, 27 Mar 2024 19:04:48 +0700 Subject: [PATCH] enable config through .env --- .env | 18 +++ .../kotaemon/embeddings/langchain_based.py | 4 +- libs/kotaemon/kotaemon/llms/__init__.py | 3 +- libs/kotaemon/kotaemon/llms/chats/__init__.py | 3 +- .../kotaemon/llms/chats/langchain_based.py | 29 ++++ libs/ktem/flowsettings.py | 146 +++++++++++------- 6 files changed, 140 insertions(+), 63 deletions(-) create mode 100644 .env diff --git a/.env b/.env new file mode 100644 index 0000000..e033553 --- /dev/null +++ b/.env @@ -0,0 +1,18 @@ +# settings for OpenAI +OPENAI_API_BASE=https://api.openai.com/v1 +OPENAI_API_KEY= +OPENAI_CHAT_MODEL=gpt-3.5-turbo +OPENAI_EMBEDDINGS_MODEL=text-embedding-ada-002 + +# settings for Azure OpenAI +AZURE_OPENAI_ENDPOINT= +AZURE_OPENAI_API_KEY= +OPENAI_API_VERSION=2024-02-15-preview +AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-35-turbo +AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=text-embedding-ada-002 + +# settings for Cohere +COHERE_API_KEY= + +# settings for local models +LOCAL_MODEL= diff --git a/libs/kotaemon/kotaemon/embeddings/langchain_based.py b/libs/kotaemon/kotaemon/embeddings/langchain_based.py index 9bd0e7b..14cb2a8 100644 --- a/libs/kotaemon/kotaemon/embeddings/langchain_based.py +++ b/libs/kotaemon/kotaemon/embeddings/langchain_based.py @@ -137,14 +137,14 @@ class AzureOpenAIEmbeddings(LCEmbeddingMixin, BaseEmbeddings): azure_endpoint: Optional[str] = None, deployment: Optional[str] = None, openai_api_key: Optional[str] = None, - openai_api_version: Optional[str] = None, + api_version: Optional[str] = None, request_timeout: Optional[float] = None, **params, ): super().__init__( azure_endpoint=azure_endpoint, deployment=deployment, - openai_api_version=openai_api_version, + api_version=api_version, openai_api_key=openai_api_key, request_timeout=request_timeout, **params, diff --git a/libs/kotaemon/kotaemon/llms/__init__.py b/libs/kotaemon/kotaemon/llms/__init__.py index 4e81d21..d7547a6 100644 --- a/libs/kotaemon/kotaemon/llms/__init__.py +++ b/libs/kotaemon/kotaemon/llms/__init__.py @@ -2,7 +2,7 @@ from kotaemon.base.schema import AIMessage, BaseMessage, HumanMessage, SystemMes from .base import BaseLLM from .branching import GatedBranchingPipeline, SimpleBranchingPipeline -from .chats import AzureChatOpenAI, ChatLLM, EndpointChatLLM, LlamaCppChat +from .chats import AzureChatOpenAI, ChatLLM, ChatOpenAI, EndpointChatLLM, LlamaCppChat from .completions import LLM, AzureOpenAI, LlamaCpp, OpenAI from .cot import ManualSequentialChainOfThought, Thought from .linear import GatedLinearPipeline, SimpleLinearPipeline @@ -17,6 +17,7 @@ __all__ = [ "HumanMessage", "AIMessage", "SystemMessage", + "ChatOpenAI", "AzureChatOpenAI", "LlamaCppChat", # completion-specific components diff --git a/libs/kotaemon/kotaemon/llms/chats/__init__.py b/libs/kotaemon/kotaemon/llms/chats/__init__.py index 53d44b2..5b50317 100644 --- a/libs/kotaemon/kotaemon/llms/chats/__init__.py +++ b/libs/kotaemon/kotaemon/llms/chats/__init__.py @@ -1,11 +1,12 @@ from .base import ChatLLM from .endpoint_based import EndpointChatLLM -from .langchain_based import AzureChatOpenAI, LCChatMixin +from .langchain_based import AzureChatOpenAI, ChatOpenAI, LCChatMixin from .llamacpp import LlamaCppChat __all__ = [ "ChatLLM", "EndpointChatLLM", + "ChatOpenAI", "AzureChatOpenAI", "LCChatMixin", "LlamaCppChat", diff --git a/libs/kotaemon/kotaemon/llms/chats/langchain_based.py b/libs/kotaemon/kotaemon/llms/chats/langchain_based.py index 14064ba..6c87c72 100644 --- a/libs/kotaemon/kotaemon/llms/chats/langchain_based.py +++ b/libs/kotaemon/kotaemon/llms/chats/langchain_based.py @@ -165,7 +165,36 @@ class LCChatMixin: raise ValueError(f"Invalid param {path}") +class ChatOpenAI(LCChatMixin, ChatLLM): # type: ignore + def __init__( + self, + openai_api_base: str | None = None, + openai_api_key: str | None = None, + model: str | None = None, + temperature: float = 0.7, + request_timeout: float | None = None, + **params, + ): + super().__init__( + openai_api_base=openai_api_base, + openai_api_key=openai_api_key, + model=model, + temperature=temperature, + request_timeout=request_timeout, + **params, + ) + + def _get_lc_class(self): + try: + from langchain_openai import ChatOpenAI + except ImportError: + from langchain.chat_models import ChatOpenAI + + return ChatOpenAI + + class AzureChatOpenAI(LCChatMixin, ChatLLM): # type: ignore + def __init__( self, azure_endpoint: str | None = None, diff --git a/libs/ktem/flowsettings.py b/libs/ktem/flowsettings.py index 52ebf86..a3589fe 100644 --- a/libs/ktem/flowsettings.py +++ b/libs/ktem/flowsettings.py @@ -31,70 +31,98 @@ KH_VECTORSTORE = { "__type__": "kotaemon.storages.ChromaVectorStore", "path": str(user_cache_dir / "vectorstore"), } -KH_LLMS = { - # example for using Azure OpenAI, the config variables can set as environment - # variables or in the .env file - # "gpt4": { - # "def": { - # "__type__": "kotaemon.llms.AzureChatOpenAI", - # "temperature": 0, - # "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""), - # "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""), - # "openai_api_version": config("OPENAI_API_VERSION", default=""), - # "deployment_name": "", - # "stream": True, - # }, - # "accuracy": 10, - # "cost": 10, - # "default": False, - # }, - # "gpt35": { - # "def": { - # "__type__": "kotaemon.llms.AzureChatOpenAI", - # "temperature": 0, - # "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""), - # "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""), - # "openai_api_version": config("OPENAI_API_VERSION", default=""), - # "deployment_name": "", - # "request_timeout": 10, - # "stream": False, - # }, - # "accuracy": 5, - # "cost": 5, - # "default": False, - # }, - "local": { +KH_LLMS = {} +KH_EMBEDDINGS = {} + +# populate options from config +if config("AZURE_OPENAI_API_KEY", default="") and config( + "AZURE_OPENAI_ENDPOINT", default="" +): + if config("AZURE_OPENAI_CHAT_DEPLOYMENT", default=""): + KH_LLMS["azure"] = { + "def": { + "__type__": "kotaemon.llms.AzureChatOpenAI", + "temperature": 0, + "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""), + "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""), + "api_version": config("OPENAI_API_VERSION", default="") + or "2024-02-15-preview", + "deployment_name": config("AZURE_OPENAI_CHAT_DEPLOYMENT", default=""), + "request_timeout": 10, + "stream": False, + }, + "default": False, + "accuracy": 5, + "cost": 5, + } + if config("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT", default=""): + KH_EMBEDDINGS["azure"] = { + "def": { + "__type__": "kotaemon.embeddings.AzureOpenAIEmbeddings", + "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""), + "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""), + "api_version": config("OPENAI_API_VERSION", default="") + or "2024-02-15-preview", + "deployment": config("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT", default=""), + "request_timeout": 10, + "chunk_size": 16, + }, + "default": False, + "accuracy": 5, + "cost": 5, + } + +if config("OPENAI_API_KEY", default=""): + KH_LLMS["openai"] = { + "def": { + "__type__": "kotaemon.llms.ChatOpenAI", + "temperature": 0, + "openai_api_base": config("OPENAI_API_BASE", default="") + or "https://api.openai.com/v1", + "openai_api_key": config("OPENAI_API_KEY", default=""), + "model": config("OPENAI_CHAT_MODEL", default="") or "gpt-3.5-turbo", + "request_timeout": 10, + "stream": False, + }, + "default": False, + } + if len(KH_EMBEDDINGS) < 1: + KH_EMBEDDINGS["openai"] = { + "def": { + "__type__": "kotaemon.embeddings.OpenAIEmbeddings", + "openai_api_base": config("OPENAI_API_BASE", default="") + or "https://api.openai.com/v1", + "openai_api_key": config("OPENAI_API_KEY", default=""), + "model": config( + "OPENAI_EMBEDDINGS_MODEL", default="text-embedding-ada-002" + ) + or "text-embedding-ada-002", + "request_timeout": 10, + "chunk_size": 16, + }, + "default": False, + } + +if config("LOCAL_MODEL", default=""): + KH_LLMS["local"] = { "def": { "__type__": "kotaemon.llms.EndpointChatLLM", "endpoint_url": "http://localhost:31415/v1/chat/completions", }, "default": False, - }, -} -KH_EMBEDDINGS = { - # example for using Azure OpenAI, the config variables can set as environment - # variables or in the .env file - # "ada": { - # "def": { - # "__type__": "kotaemon.embeddings.AzureOpenAIEmbeddings", - # "model": "text-embedding-ada-002", - # "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""), - # "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""), - # "deployment": "", - # "chunk_size": 16, - # }, - # "accuracy": 5, - # "cost": 5, - # "default": True, - # }, - "local": { - "def": { - "__type__": "kotaemon.embeddings.EndpointEmbeddings", - "endpoint_url": "http://localhost:31415/v1/embeddings", - }, - "default": False, - }, -} + "cost": 0, + } + if len(KH_EMBEDDINGS) < 1: + KH_EMBEDDINGS["local"] = { + "def": { + "__type__": "kotaemon.embeddings.EndpointEmbeddings", + "endpoint_url": "http://localhost:31415/v1/embeddings", + }, + "default": False, + "cost": 0, + } + + KH_REASONINGS = ["ktem.reasoning.simple.FullQAPipeline"]