Compare commits
10 Commits
6f4acc979c
...
d8309edefb
Author | SHA1 | Date | |
---|---|---|---|
d8309edefb | |||
17864a439a | |||
|
37cdc28ceb | ||
|
ec1f6abdc4 | ||
|
ffe766f24d | ||
|
833982ac81 | ||
|
ddb5187293 | ||
|
5132288386 | ||
|
c33bedca9e | ||
|
9b05693e4f |
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
# this is an example .env file, use it to create your own .env file and place it in the root of the project
|
# this is an example .env file, use it to create your own .env file and place it in the root of the project
|
||||||
|
|
||||||
# settings for OpenAI
|
# settings for OpenAI
|
||||||
|
@ -16,6 +17,12 @@ AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=text-embedding-ada-002
|
||||||
# settings for Cohere
|
# settings for Cohere
|
||||||
COHERE_API_KEY=<COHERE_API_KEY>
|
COHERE_API_KEY=<COHERE_API_KEY>
|
||||||
|
|
||||||
|
# settings for Mistral
|
||||||
|
# MISTRAL_API_KEY=placeholder
|
||||||
|
|
||||||
|
# settings for VoyageAI
|
||||||
|
VOYAGE_API_KEY=<VOYAGE_API_KEY>
|
||||||
|
|
||||||
# settings for local models
|
# settings for local models
|
||||||
LOCAL_MODEL=qwen2.5:7b
|
LOCAL_MODEL=qwen2.5:7b
|
||||||
LOCAL_MODEL_EMBEDDINGS=nomic-embed-text
|
LOCAL_MODEL_EMBEDDINGS=nomic-embed-text
|
||||||
|
|
|
@ -172,6 +172,25 @@ if OPENAI_API_KEY:
|
||||||
"default": IS_OPENAI_DEFAULT,
|
"default": IS_OPENAI_DEFAULT,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VOYAGE_API_KEY = config("VOYAGE_API_KEY", default="")
|
||||||
|
if VOYAGE_API_KEY:
|
||||||
|
KH_EMBEDDINGS["voyageai"] = {
|
||||||
|
"spec": {
|
||||||
|
"__type__": "kotaemon.embeddings.VoyageAIEmbeddings",
|
||||||
|
"api_key": VOYAGE_API_KEY,
|
||||||
|
"model": config("VOYAGE_EMBEDDINGS_MODEL", default="voyage-3-large"),
|
||||||
|
},
|
||||||
|
"default": False,
|
||||||
|
}
|
||||||
|
KH_RERANKINGS["voyageai"] = {
|
||||||
|
"spec": {
|
||||||
|
"__type__": "kotaemon.rerankings.VoyageAIReranking",
|
||||||
|
"model_name": "rerank-2",
|
||||||
|
"api_key": VOYAGE_API_KEY,
|
||||||
|
},
|
||||||
|
"default": False,
|
||||||
|
}
|
||||||
|
|
||||||
if config("LOCAL_MODEL", default=""):
|
if config("LOCAL_MODEL", default=""):
|
||||||
KH_LLMS["ollama"] = {
|
KH_LLMS["ollama"] = {
|
||||||
"spec": {
|
"spec": {
|
||||||
|
@ -243,6 +262,15 @@ KH_LLMS["cohere"] = {
|
||||||
},
|
},
|
||||||
"default": False,
|
"default": False,
|
||||||
}
|
}
|
||||||
|
KH_LLMS["mistral"] = {
|
||||||
|
"spec": {
|
||||||
|
"__type__": "kotaemon.llms.ChatOpenAI",
|
||||||
|
"base_url": "https://api.mistral.ai/v1",
|
||||||
|
"model": "ministral-8b-latest",
|
||||||
|
"api_key": config("MISTRAL_API_KEY", default="your-key"),
|
||||||
|
},
|
||||||
|
"default": False,
|
||||||
|
}
|
||||||
|
|
||||||
# additional embeddings configurations
|
# additional embeddings configurations
|
||||||
KH_EMBEDDINGS["cohere"] = {
|
KH_EMBEDDINGS["cohere"] = {
|
||||||
|
@ -262,6 +290,14 @@ KH_EMBEDDINGS["google"] = {
|
||||||
},
|
},
|
||||||
"default": not IS_OPENAI_DEFAULT,
|
"default": not IS_OPENAI_DEFAULT,
|
||||||
}
|
}
|
||||||
|
KH_EMBEDDINGS["mistral"] = {
|
||||||
|
"spec": {
|
||||||
|
"__type__": "kotaemon.embeddings.LCMistralEmbeddings",
|
||||||
|
"model": "mistral-embed",
|
||||||
|
"api_key": config("MISTRAL_API_KEY", default="your-key"),
|
||||||
|
},
|
||||||
|
"default": False,
|
||||||
|
}
|
||||||
# KH_EMBEDDINGS["huggingface"] = {
|
# KH_EMBEDDINGS["huggingface"] = {
|
||||||
# "spec": {
|
# "spec": {
|
||||||
# "__type__": "kotaemon.embeddings.LCHuggingFaceEmbeddings",
|
# "__type__": "kotaemon.embeddings.LCHuggingFaceEmbeddings",
|
||||||
|
@ -343,7 +379,7 @@ GRAPHRAG_INDICES = [
|
||||||
"config": {
|
"config": {
|
||||||
"supported_file_types": (
|
"supported_file_types": (
|
||||||
".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
|
".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
|
||||||
".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
|
".pptx, .csv, .html, .mhtml, .txt, .md, .zip, .json"
|
||||||
),
|
),
|
||||||
"private": True,
|
"private": True,
|
||||||
},
|
},
|
||||||
|
|
|
@ -8,6 +8,7 @@ from .schema import (
|
||||||
HumanMessage,
|
HumanMessage,
|
||||||
LLMInterface,
|
LLMInterface,
|
||||||
RetrievedDocument,
|
RetrievedDocument,
|
||||||
|
StructuredOutputLLMInterface,
|
||||||
SystemMessage,
|
SystemMessage,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -21,6 +22,7 @@ __all__ = [
|
||||||
"HumanMessage",
|
"HumanMessage",
|
||||||
"RetrievedDocument",
|
"RetrievedDocument",
|
||||||
"LLMInterface",
|
"LLMInterface",
|
||||||
|
"StructuredOutputLLMInterface",
|
||||||
"ExtractorOutput",
|
"ExtractorOutput",
|
||||||
"Param",
|
"Param",
|
||||||
"Node",
|
"Node",
|
||||||
|
|
|
@ -143,6 +143,11 @@ class LLMInterface(AIMessage):
|
||||||
logprobs: list[float] = []
|
logprobs: list[float] = []
|
||||||
|
|
||||||
|
|
||||||
|
class StructuredOutputLLMInterface(LLMInterface):
|
||||||
|
parsed: Any
|
||||||
|
refusal: str = ""
|
||||||
|
|
||||||
|
|
||||||
class ExtractorOutput(Document):
|
class ExtractorOutput(Document):
|
||||||
"""
|
"""
|
||||||
Represents the output of an extractor.
|
Represents the output of an extractor.
|
||||||
|
|
|
@ -6,10 +6,12 @@ from .langchain_based import (
|
||||||
LCCohereEmbeddings,
|
LCCohereEmbeddings,
|
||||||
LCGoogleEmbeddings,
|
LCGoogleEmbeddings,
|
||||||
LCHuggingFaceEmbeddings,
|
LCHuggingFaceEmbeddings,
|
||||||
|
LCMistralEmbeddings,
|
||||||
LCOpenAIEmbeddings,
|
LCOpenAIEmbeddings,
|
||||||
)
|
)
|
||||||
from .openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
|
from .openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
|
||||||
from .tei_endpoint_embed import TeiEndpointEmbeddings
|
from .tei_endpoint_embed import TeiEndpointEmbeddings
|
||||||
|
from .voyageai import VoyageAIEmbeddings
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"BaseEmbeddings",
|
"BaseEmbeddings",
|
||||||
|
@ -20,7 +22,9 @@ __all__ = [
|
||||||
"LCCohereEmbeddings",
|
"LCCohereEmbeddings",
|
||||||
"LCHuggingFaceEmbeddings",
|
"LCHuggingFaceEmbeddings",
|
||||||
"LCGoogleEmbeddings",
|
"LCGoogleEmbeddings",
|
||||||
|
"LCMistralEmbeddings",
|
||||||
"OpenAIEmbeddings",
|
"OpenAIEmbeddings",
|
||||||
"AzureOpenAIEmbeddings",
|
"AzureOpenAIEmbeddings",
|
||||||
"FastEmbedEmbeddings",
|
"FastEmbedEmbeddings",
|
||||||
|
"VoyageAIEmbeddings",
|
||||||
]
|
]
|
||||||
|
|
|
@ -254,3 +254,40 @@ class LCGoogleEmbeddings(LCEmbeddingMixin, BaseEmbeddings):
|
||||||
raise ImportError("Please install langchain-google-genai")
|
raise ImportError("Please install langchain-google-genai")
|
||||||
|
|
||||||
return GoogleGenerativeAIEmbeddings
|
return GoogleGenerativeAIEmbeddings
|
||||||
|
|
||||||
|
|
||||||
|
class LCMistralEmbeddings(LCEmbeddingMixin, BaseEmbeddings):
|
||||||
|
"""Wrapper around LangChain's MistralAI embedding, focusing on key parameters"""
|
||||||
|
|
||||||
|
api_key: str = Param(
|
||||||
|
help="API key (https://console.mistral.ai/api-keys)",
|
||||||
|
default=None,
|
||||||
|
required=True,
|
||||||
|
)
|
||||||
|
model: str = Param(
|
||||||
|
help="Model name to use ('mistral-embed')",
|
||||||
|
default="mistral-embed",
|
||||||
|
required=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
model: str = "mistral-embed",
|
||||||
|
api_key: Optional[str] = None,
|
||||||
|
**params,
|
||||||
|
):
|
||||||
|
super().__init__(
|
||||||
|
model=model,
|
||||||
|
api_key=api_key,
|
||||||
|
**params,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _get_lc_class(self):
|
||||||
|
try:
|
||||||
|
from langchain_mistralai import MistralAIEmbeddings
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"Please install langchain_mistralai: "
|
||||||
|
"`pip install -U langchain_mistralai`"
|
||||||
|
)
|
||||||
|
return MistralAIEmbeddings
|
||||||
|
|
66
libs/kotaemon/kotaemon/embeddings/voyageai.py
Normal file
66
libs/kotaemon/kotaemon/embeddings/voyageai.py
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
"""Implements embeddings from [Voyage AI](https://voyageai.com).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import importlib
|
||||||
|
|
||||||
|
from kotaemon.base import Document, DocumentWithEmbedding, Param
|
||||||
|
|
||||||
|
from .base import BaseEmbeddings
|
||||||
|
|
||||||
|
vo = None
|
||||||
|
|
||||||
|
|
||||||
|
def _import_voyageai():
|
||||||
|
global vo
|
||||||
|
if not vo:
|
||||||
|
vo = importlib.import_module("voyageai")
|
||||||
|
return vo
|
||||||
|
|
||||||
|
|
||||||
|
def _format_output(texts: list[str], embeddings: list[list]):
|
||||||
|
"""Formats the output of all `.embed` calls.
|
||||||
|
Args:
|
||||||
|
texts: List of original documents
|
||||||
|
embeddings: Embeddings corresponding to each document
|
||||||
|
"""
|
||||||
|
return [
|
||||||
|
DocumentWithEmbedding(content=text, embedding=embedding)
|
||||||
|
for text, embedding in zip(texts, embeddings)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class VoyageAIEmbeddings(BaseEmbeddings):
|
||||||
|
"""Voyage AI provides best-in-class embedding models and rerankers."""
|
||||||
|
|
||||||
|
api_key: str = Param(None, help="Voyage API key", required=False)
|
||||||
|
model: str = Param(
|
||||||
|
"voyage-3",
|
||||||
|
help=(
|
||||||
|
"Model name to use. The Voyage "
|
||||||
|
"[documentation](https://docs.voyageai.com/docs/embeddings) "
|
||||||
|
"provides a list of all available embedding models."
|
||||||
|
),
|
||||||
|
required=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
if not self.api_key:
|
||||||
|
raise ValueError("API key must be provided for VoyageAIEmbeddings.")
|
||||||
|
|
||||||
|
self._client = _import_voyageai().Client(api_key=self.api_key)
|
||||||
|
self._aclient = _import_voyageai().AsyncClient(api_key=self.api_key)
|
||||||
|
|
||||||
|
def invoke(
|
||||||
|
self, text: str | list[str] | Document | list[Document], *args, **kwargs
|
||||||
|
) -> list[DocumentWithEmbedding]:
|
||||||
|
texts = [t.content for t in self.prepare_input(text)]
|
||||||
|
embeddings = self._client.embed(texts, model=self.model).embeddings
|
||||||
|
return _format_output(texts, embeddings)
|
||||||
|
|
||||||
|
async def ainvoke(
|
||||||
|
self, text: str | list[str] | Document | list[Document], *args, **kwargs
|
||||||
|
) -> list[DocumentWithEmbedding]:
|
||||||
|
texts = [t.content for t in self.prepare_input(text)]
|
||||||
|
embeddings = await self._aclient.embed(texts, model=self.model).embeddings
|
||||||
|
return _format_output(texts, embeddings)
|
|
@ -168,7 +168,7 @@ class VectorRetrieval(BaseRetrieval):
|
||||||
if self.retrieval_mode == "vector":
|
if self.retrieval_mode == "vector":
|
||||||
emb = self.embedding(text)[0].embedding
|
emb = self.embedding(text)[0].embedding
|
||||||
_, scores, ids = self.vector_store.query(
|
_, scores, ids = self.vector_store.query(
|
||||||
embedding=emb, top_k=top_k_first_round, **kwargs
|
embedding=emb, top_k=top_k_first_round, doc_ids=scope, **kwargs
|
||||||
)
|
)
|
||||||
docs = self.doc_store.get(ids)
|
docs = self.doc_store.get(ids)
|
||||||
result = [
|
result = [
|
||||||
|
@ -197,7 +197,7 @@ class VectorRetrieval(BaseRetrieval):
|
||||||
|
|
||||||
assert self.doc_store is not None
|
assert self.doc_store is not None
|
||||||
_, vs_scores, vs_ids = self.vector_store.query(
|
_, vs_scores, vs_ids = self.vector_store.query(
|
||||||
embedding=emb, top_k=top_k_first_round, **kwargs
|
embedding=emb, top_k=top_k_first_round, doc_ids=scope, **kwargs
|
||||||
)
|
)
|
||||||
if vs_ids:
|
if vs_ids:
|
||||||
vs_docs = self.doc_store.get(vs_ids)
|
vs_docs = self.doc_store.get(vs_ids)
|
||||||
|
|
|
@ -14,6 +14,7 @@ from .chats import (
|
||||||
LCGeminiChat,
|
LCGeminiChat,
|
||||||
LCOllamaChat,
|
LCOllamaChat,
|
||||||
LlamaCppChat,
|
LlamaCppChat,
|
||||||
|
StructuredOutputChatOpenAI,
|
||||||
)
|
)
|
||||||
from .completions import LLM, AzureOpenAI, LlamaCpp, OpenAI
|
from .completions import LLM, AzureOpenAI, LlamaCpp, OpenAI
|
||||||
from .cot import ManualSequentialChainOfThought, Thought
|
from .cot import ManualSequentialChainOfThought, Thought
|
||||||
|
@ -31,6 +32,7 @@ __all__ = [
|
||||||
"SystemMessage",
|
"SystemMessage",
|
||||||
"AzureChatOpenAI",
|
"AzureChatOpenAI",
|
||||||
"ChatOpenAI",
|
"ChatOpenAI",
|
||||||
|
"StructuredOutputChatOpenAI",
|
||||||
"LCAnthropicChat",
|
"LCAnthropicChat",
|
||||||
"LCGeminiChat",
|
"LCGeminiChat",
|
||||||
"LCCohereChat",
|
"LCCohereChat",
|
||||||
|
|
|
@ -10,7 +10,7 @@ from .langchain_based import (
|
||||||
LCOllamaChat,
|
LCOllamaChat,
|
||||||
)
|
)
|
||||||
from .llamacpp import LlamaCppChat
|
from .llamacpp import LlamaCppChat
|
||||||
from .openai import AzureChatOpenAI, ChatOpenAI
|
from .openai import AzureChatOpenAI, ChatOpenAI, StructuredOutputChatOpenAI
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"ChatOpenAI",
|
"ChatOpenAI",
|
||||||
|
@ -18,6 +18,7 @@ __all__ = [
|
||||||
"ChatLLM",
|
"ChatLLM",
|
||||||
"EndpointChatLLM",
|
"EndpointChatLLM",
|
||||||
"ChatOpenAI",
|
"ChatOpenAI",
|
||||||
|
"StructuredOutputChatOpenAI",
|
||||||
"LCAnthropicChat",
|
"LCAnthropicChat",
|
||||||
"LCGeminiChat",
|
"LCGeminiChat",
|
||||||
"LCCohereChat",
|
"LCCohereChat",
|
||||||
|
|
|
@ -1,8 +1,16 @@
|
||||||
from typing import TYPE_CHECKING, AsyncGenerator, Iterator, Optional
|
from typing import TYPE_CHECKING, AsyncGenerator, Iterator, Optional, Type
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
from theflow.utils.modules import import_dotted_string
|
from theflow.utils.modules import import_dotted_string
|
||||||
|
|
||||||
from kotaemon.base import AIMessage, BaseMessage, HumanMessage, LLMInterface, Param
|
from kotaemon.base import (
|
||||||
|
AIMessage,
|
||||||
|
BaseMessage,
|
||||||
|
HumanMessage,
|
||||||
|
LLMInterface,
|
||||||
|
Param,
|
||||||
|
StructuredOutputLLMInterface,
|
||||||
|
)
|
||||||
|
|
||||||
from .base import ChatLLM
|
from .base import ChatLLM
|
||||||
|
|
||||||
|
@ -330,6 +338,88 @@ class ChatOpenAI(BaseChatOpenAI):
|
||||||
return await client.chat.completions.create(**params)
|
return await client.chat.completions.create(**params)
|
||||||
|
|
||||||
|
|
||||||
|
class StructuredOutputChatOpenAI(ChatOpenAI):
|
||||||
|
"""OpenAI chat model that returns structured output"""
|
||||||
|
|
||||||
|
response_schema: Type[BaseModel] = Param(
|
||||||
|
help="class that subclasses pydantics BaseModel", required=True
|
||||||
|
)
|
||||||
|
|
||||||
|
def prepare_output(self, resp: dict) -> StructuredOutputLLMInterface:
|
||||||
|
"""Convert the OpenAI response into StructuredOutputLLMInterface"""
|
||||||
|
additional_kwargs = {}
|
||||||
|
|
||||||
|
if "tool_calls" in resp["choices"][0]["message"]:
|
||||||
|
additional_kwargs["tool_calls"] = resp["choices"][0]["message"][
|
||||||
|
"tool_calls"
|
||||||
|
]
|
||||||
|
|
||||||
|
if resp["choices"][0].get("logprobs") is None:
|
||||||
|
logprobs = []
|
||||||
|
else:
|
||||||
|
all_logprobs = resp["choices"][0]["logprobs"].get("content")
|
||||||
|
logprobs = (
|
||||||
|
[logprob["logprob"] for logprob in all_logprobs] if all_logprobs else []
|
||||||
|
)
|
||||||
|
|
||||||
|
output = StructuredOutputLLMInterface(
|
||||||
|
parsed=resp["choices"][0]["message"]["parsed"],
|
||||||
|
candidates=[(_["message"]["content"] or "") for _ in resp["choices"]],
|
||||||
|
content=resp["choices"][0]["message"]["content"] or "",
|
||||||
|
total_tokens=resp["usage"]["total_tokens"],
|
||||||
|
prompt_tokens=resp["usage"]["prompt_tokens"],
|
||||||
|
completion_tokens=resp["usage"]["completion_tokens"],
|
||||||
|
messages=[
|
||||||
|
AIMessage(content=(_["message"]["content"]) or "")
|
||||||
|
for _ in resp["choices"]
|
||||||
|
],
|
||||||
|
additional_kwargs=additional_kwargs,
|
||||||
|
logprobs=logprobs,
|
||||||
|
)
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
def prepare_params(self, **kwargs):
|
||||||
|
if "tools_pydantic" in kwargs:
|
||||||
|
kwargs.pop("tools_pydantic")
|
||||||
|
|
||||||
|
params_ = {
|
||||||
|
"model": self.model,
|
||||||
|
"temperature": self.temperature,
|
||||||
|
"max_tokens": self.max_tokens,
|
||||||
|
"n": self.n,
|
||||||
|
"stop": self.stop,
|
||||||
|
"frequency_penalty": self.frequency_penalty,
|
||||||
|
"presence_penalty": self.presence_penalty,
|
||||||
|
"tool_choice": self.tool_choice,
|
||||||
|
"tools": self.tools,
|
||||||
|
"logprobs": self.logprobs,
|
||||||
|
"logit_bias": self.logit_bias,
|
||||||
|
"top_logprobs": self.top_logprobs,
|
||||||
|
"top_p": self.top_p,
|
||||||
|
"response_format": self.response_schema,
|
||||||
|
}
|
||||||
|
params = {k: v for k, v in params_.items() if v is not None}
|
||||||
|
params.update(kwargs)
|
||||||
|
|
||||||
|
# doesn't do streaming
|
||||||
|
params.pop("stream")
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
def openai_response(self, client, **kwargs):
|
||||||
|
"""Get the openai response"""
|
||||||
|
params = self.prepare_params(**kwargs)
|
||||||
|
|
||||||
|
return client.beta.chat.completions.parse(**params)
|
||||||
|
|
||||||
|
async def aopenai_response(self, client, **kwargs):
|
||||||
|
"""Get the openai response"""
|
||||||
|
params = self.prepare_params(**kwargs)
|
||||||
|
|
||||||
|
return await client.beta.chat.completions.parse(**params)
|
||||||
|
|
||||||
|
|
||||||
class AzureChatOpenAI(BaseChatOpenAI):
|
class AzureChatOpenAI(BaseChatOpenAI):
|
||||||
"""OpenAI chat model provided by Microsoft Azure"""
|
"""OpenAI chat model provided by Microsoft Azure"""
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
from .base import BaseReranking
|
from .base import BaseReranking
|
||||||
from .cohere import CohereReranking
|
from .cohere import CohereReranking
|
||||||
from .tei_fast_rerank import TeiFastReranking
|
from .tei_fast_rerank import TeiFastReranking
|
||||||
|
from .voyageai import VoyageAIReranking
|
||||||
|
|
||||||
__all__ = ["BaseReranking", "TeiFastReranking", "CohereReranking"]
|
__all__ = ["BaseReranking", "TeiFastReranking", "CohereReranking", "VoyageAIReranking"]
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
from decouple import config
|
from decouple import config
|
||||||
|
|
||||||
from kotaemon.base import Document, Param
|
from kotaemon.base import Document, Param
|
||||||
|
@ -23,6 +25,11 @@ class CohereReranking(BaseReranking):
|
||||||
help="Cohere API key",
|
help="Cohere API key",
|
||||||
required=True,
|
required=True,
|
||||||
)
|
)
|
||||||
|
base_url: str = Param(
|
||||||
|
None,
|
||||||
|
help="Rerank API base url. Default is https://api.cohere.com",
|
||||||
|
required=False,
|
||||||
|
)
|
||||||
|
|
||||||
def run(self, documents: list[Document], query: str) -> list[Document]:
|
def run(self, documents: list[Document], query: str) -> list[Document]:
|
||||||
"""Use Cohere Reranker model to re-order documents
|
"""Use Cohere Reranker model to re-order documents
|
||||||
|
@ -38,7 +45,9 @@ class CohereReranking(BaseReranking):
|
||||||
print("Cohere API key not found. Skipping rerankings.")
|
print("Cohere API key not found. Skipping rerankings.")
|
||||||
return documents
|
return documents
|
||||||
|
|
||||||
cohere_client = cohere.Client(self.cohere_api_key)
|
cohere_client = cohere.Client(
|
||||||
|
self.cohere_api_key, base_url=self.base_url or os.getenv("CO_API_URL")
|
||||||
|
)
|
||||||
compressed_docs: list[Document] = []
|
compressed_docs: list[Document] = []
|
||||||
|
|
||||||
if not documents: # to avoid empty api call
|
if not documents: # to avoid empty api call
|
||||||
|
|
63
libs/kotaemon/kotaemon/rerankings/voyageai.py
Normal file
63
libs/kotaemon/kotaemon/rerankings/voyageai.py
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import importlib
|
||||||
|
|
||||||
|
from decouple import config
|
||||||
|
|
||||||
|
from kotaemon.base import Document, Param
|
||||||
|
|
||||||
|
from .base import BaseReranking
|
||||||
|
|
||||||
|
vo = None
|
||||||
|
|
||||||
|
|
||||||
|
def _import_voyageai():
|
||||||
|
global vo
|
||||||
|
if not vo:
|
||||||
|
vo = importlib.import_module("voyageai")
|
||||||
|
return vo
|
||||||
|
|
||||||
|
|
||||||
|
class VoyageAIReranking(BaseReranking):
|
||||||
|
"""VoyageAI Reranking model"""
|
||||||
|
|
||||||
|
model_name: str = Param(
|
||||||
|
"rerank-2",
|
||||||
|
help=(
|
||||||
|
"ID of the model to use. You can go to [Supported Models]"
|
||||||
|
"(https://docs.voyageai.com/docs/reranker) to see the supported models"
|
||||||
|
),
|
||||||
|
required=True,
|
||||||
|
)
|
||||||
|
api_key: str = Param(
|
||||||
|
config("VOYAGE_API_KEY", ""),
|
||||||
|
help="VoyageAI API key",
|
||||||
|
required=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
if not self.api_key:
|
||||||
|
raise ValueError("API key must be provided for VoyageAIEmbeddings.")
|
||||||
|
|
||||||
|
self._client = _import_voyageai().Client(api_key=self.api_key)
|
||||||
|
self._aclient = _import_voyageai().AsyncClient(api_key=self.api_key)
|
||||||
|
|
||||||
|
def run(self, documents: list[Document], query: str) -> list[Document]:
|
||||||
|
"""Use VoyageAI Reranker model to re-order documents
|
||||||
|
with their relevance score"""
|
||||||
|
compressed_docs: list[Document] = []
|
||||||
|
|
||||||
|
if not documents: # to avoid empty api call
|
||||||
|
return compressed_docs
|
||||||
|
|
||||||
|
_docs = [d.content for d in documents]
|
||||||
|
response = self._client.rerank(
|
||||||
|
model=self.model_name, query=query, documents=_docs
|
||||||
|
)
|
||||||
|
for r in response.results:
|
||||||
|
doc = documents[r.index]
|
||||||
|
doc.metadata["reranking_score"] = r.relevance_score
|
||||||
|
compressed_docs.append(doc)
|
||||||
|
|
||||||
|
return compressed_docs
|
|
@ -113,14 +113,18 @@ class LanceDBDocumentStore(BaseDocumentStore):
|
||||||
)
|
)
|
||||||
except (ValueError, FileNotFoundError):
|
except (ValueError, FileNotFoundError):
|
||||||
docs = []
|
docs = []
|
||||||
return [
|
|
||||||
Document(
|
# return the documents using the order of original
|
||||||
|
# ids (which were ordered by score)
|
||||||
|
doc_dict = {
|
||||||
|
doc["id"]: Document(
|
||||||
id_=doc["id"],
|
id_=doc["id"],
|
||||||
text=doc["text"] if doc["text"] else "<empty>",
|
text=doc["text"] if doc["text"] else "<empty>",
|
||||||
metadata=json.loads(doc["attributes"]),
|
metadata=json.loads(doc["attributes"]),
|
||||||
)
|
)
|
||||||
for doc in docs
|
for doc in docs
|
||||||
]
|
}
|
||||||
|
return [doc_dict[_id] for _id in ids if _id in doc_dict]
|
||||||
|
|
||||||
def delete(self, ids: Union[List[str], str], refresh_indices: bool = True):
|
def delete(self, ids: Union[List[str], str], refresh_indices: bool = True):
|
||||||
"""Delete document by id"""
|
"""Delete document by id"""
|
||||||
|
|
|
@ -36,6 +36,7 @@ dependencies = [
|
||||||
"langchain-google-genai>=1.0.3,<2.0.0",
|
"langchain-google-genai>=1.0.3,<2.0.0",
|
||||||
"langchain-anthropic",
|
"langchain-anthropic",
|
||||||
"langchain-ollama",
|
"langchain-ollama",
|
||||||
|
"langchain-mistralai",
|
||||||
"langchain-cohere>=0.2.4,<0.3.0",
|
"langchain-cohere>=0.2.4,<0.3.0",
|
||||||
"llama-hub>=0.0.79,<0.1.0",
|
"llama-hub>=0.0.79,<0.1.0",
|
||||||
"llama-index>=0.10.40,<0.11.0",
|
"llama-index>=0.10.40,<0.11.0",
|
||||||
|
@ -89,6 +90,7 @@ adv = [
|
||||||
"tabulate",
|
"tabulate",
|
||||||
"unstructured>=0.15.8,<0.16",
|
"unstructured>=0.15.8,<0.16",
|
||||||
"wikipedia>=1.4.0,<1.5",
|
"wikipedia>=1.4.0,<1.5",
|
||||||
|
"voyageai>=0.3.0",
|
||||||
]
|
]
|
||||||
dev = [
|
dev = [
|
||||||
"black",
|
"black",
|
||||||
|
|
|
@ -70,6 +70,15 @@ def if_llama_cpp_not_installed():
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def if_voyageai_not_installed():
|
||||||
|
try:
|
||||||
|
import voyageai # noqa: F401
|
||||||
|
except ImportError:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
skip_when_haystack_not_installed = pytest.mark.skipif(
|
skip_when_haystack_not_installed = pytest.mark.skipif(
|
||||||
if_haystack_not_installed(), reason="Haystack is not installed"
|
if_haystack_not_installed(), reason="Haystack is not installed"
|
||||||
)
|
)
|
||||||
|
@ -97,3 +106,7 @@ skip_openai_lc_wrapper_test = pytest.mark.skipif(
|
||||||
skip_llama_cpp_not_installed = pytest.mark.skipif(
|
skip_llama_cpp_not_installed = pytest.mark.skipif(
|
||||||
if_llama_cpp_not_installed(), reason="llama_cpp is not installed"
|
if_llama_cpp_not_installed(), reason="llama_cpp is not installed"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
skip_when_voyageai_not_installed = pytest.mark.skipif(
|
||||||
|
if_voyageai_not_installed(), reason="voyageai is not installed"
|
||||||
|
)
|
||||||
|
|
|
@ -1,22 +1,24 @@
|
||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from unittest.mock import patch
|
from unittest.mock import Mock, patch
|
||||||
|
|
||||||
from openai.types.create_embedding_response import CreateEmbeddingResponse
|
from openai.types.create_embedding_response import CreateEmbeddingResponse
|
||||||
|
|
||||||
from kotaemon.base import Document
|
from kotaemon.base import Document, DocumentWithEmbedding
|
||||||
from kotaemon.embeddings import (
|
from kotaemon.embeddings import (
|
||||||
AzureOpenAIEmbeddings,
|
AzureOpenAIEmbeddings,
|
||||||
FastEmbedEmbeddings,
|
FastEmbedEmbeddings,
|
||||||
LCCohereEmbeddings,
|
LCCohereEmbeddings,
|
||||||
LCHuggingFaceEmbeddings,
|
LCHuggingFaceEmbeddings,
|
||||||
OpenAIEmbeddings,
|
OpenAIEmbeddings,
|
||||||
|
VoyageAIEmbeddings,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .conftest import (
|
from .conftest import (
|
||||||
skip_when_cohere_not_installed,
|
skip_when_cohere_not_installed,
|
||||||
skip_when_fastembed_not_installed,
|
skip_when_fastembed_not_installed,
|
||||||
skip_when_sentence_bert_not_installed,
|
skip_when_sentence_bert_not_installed,
|
||||||
|
skip_when_voyageai_not_installed,
|
||||||
)
|
)
|
||||||
|
|
||||||
with open(Path(__file__).parent / "resources" / "embedding_openai_batch.json") as f:
|
with open(Path(__file__).parent / "resources" / "embedding_openai_batch.json") as f:
|
||||||
|
@ -155,3 +157,16 @@ def test_fastembed_embeddings():
|
||||||
model = FastEmbedEmbeddings()
|
model = FastEmbedEmbeddings()
|
||||||
output = model("Hello World")
|
output = model("Hello World")
|
||||||
assert_embedding_result(output)
|
assert_embedding_result(output)
|
||||||
|
|
||||||
|
|
||||||
|
voyage_output_mock = Mock()
|
||||||
|
voyage_output_mock.embeddings = [[1.0, 2.1, 3.2]]
|
||||||
|
|
||||||
|
|
||||||
|
@skip_when_voyageai_not_installed
|
||||||
|
@patch("voyageai.Client.embed", return_value=voyage_output_mock)
|
||||||
|
@patch("voyageai.AsyncClient.embed", return_value=voyage_output_mock)
|
||||||
|
def test_voyageai_embeddings(sync_call, async_call):
|
||||||
|
model = VoyageAIEmbeddings(api_key="test")
|
||||||
|
output = model("Hello, world!")
|
||||||
|
assert all(isinstance(doc, DocumentWithEmbedding) for doc in output)
|
||||||
|
|
|
@ -39,7 +39,7 @@ class BaseApp:
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.dev_mode = getattr(settings, "KH_MODE", "") == "dev"
|
self.dev_mode = getattr(settings, "KH_MODE", "") == "dev"
|
||||||
self.app_name = getattr(settings, "KH_APP_NAME", "Kotaemon")
|
self.app_name = getattr(settings, "KH_APP_NAME", "DatallChat")
|
||||||
self.app_version = getattr(settings, "KH_APP_VERSION", "")
|
self.app_version = getattr(settings, "KH_APP_VERSION", "")
|
||||||
self.f_user_management = getattr(settings, "KH_FEATURE_USER_MANAGEMENT", False)
|
self.f_user_management = getattr(settings, "KH_FEATURE_USER_MANAGEMENT", False)
|
||||||
self._theme = KotaemonTheme()
|
self._theme = KotaemonTheme()
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
:root {
|
:root {
|
||||||
--main-area-height: calc(100vh - 110px);
|
--main-area-height: calc(100vh - 110px);
|
||||||
|
direction: rtl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* no footer */
|
/* no footer */
|
||||||
footer {
|
footer {
|
||||||
display: none !important;
|
display: none !important;
|
||||||
|
@ -27,6 +30,11 @@ footer {
|
||||||
height: 100% !important; */
|
height: 100% !important; */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
input[type="radio"] {
|
||||||
|
margin-left: 5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.gradio-container
|
||||||
/* styling for header bar */
|
/* styling for header bar */
|
||||||
.header-bar {
|
.header-bar {
|
||||||
background-color: transparent;
|
background-color: transparent;
|
||||||
|
@ -168,6 +176,27 @@ mark {
|
||||||
color: var(--body-text-color);
|
color: var(--body-text-color);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#chat-input textarea{
|
||||||
|
direction: rtl;
|
||||||
|
}
|
||||||
|
#chat-input button.submit-button{
|
||||||
|
margin-left: 3px;
|
||||||
|
margin-right: 3px;
|
||||||
|
transform: scaleX(-1);
|
||||||
|
-moz-transform: scaleX(-1);
|
||||||
|
-webkit-transform: scaleX(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-wrap {
|
||||||
|
position: relative;
|
||||||
|
}
|
||||||
|
|
||||||
|
.secondary-wrap .icon-wrap{
|
||||||
|
/* direction: ltr; */
|
||||||
|
position:absolute;
|
||||||
|
right: 90%;
|
||||||
|
}
|
||||||
|
|
||||||
/* for setting right-aligned buttons */
|
/* for setting right-aligned buttons */
|
||||||
.right-button {
|
.right-button {
|
||||||
min-width: 200px !important;
|
min-width: 200px !important;
|
||||||
|
@ -195,13 +224,13 @@ mark {
|
||||||
#toggle-dark-button {
|
#toggle-dark-button {
|
||||||
position: fixed;
|
position: fixed;
|
||||||
top: 6px;
|
top: 6px;
|
||||||
right: 30px;
|
left: 30px;
|
||||||
}
|
}
|
||||||
|
|
||||||
#info-expand-button {
|
#info-expand-button {
|
||||||
position: absolute;
|
position: absolute;
|
||||||
top: 6px;
|
top: 6px;
|
||||||
right: 15px;
|
left: 30px;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* prevent overflow of html info panel */
|
/* prevent overflow of html info panel */
|
||||||
|
@ -212,7 +241,7 @@ mark {
|
||||||
#chat-expand-button {
|
#chat-expand-button {
|
||||||
position: absolute;
|
position: absolute;
|
||||||
top: 6px;
|
top: 6px;
|
||||||
right: -10px;
|
left: -10px;
|
||||||
z-index: 1;
|
z-index: 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -231,14 +260,14 @@ mark {
|
||||||
position: absolute;
|
position: absolute;
|
||||||
width: 110px;
|
width: 110px;
|
||||||
top: 10px;
|
top: 10px;
|
||||||
right: 25px;
|
left: 15px;
|
||||||
}
|
}
|
||||||
|
|
||||||
#citation-dropdown {
|
#citation-dropdown {
|
||||||
width: min(25%, 100px);
|
width: min(25%, 100px);
|
||||||
position: absolute;
|
position: absolute;
|
||||||
top: 2px;
|
top: 2px;
|
||||||
left: 120px;
|
right: 120px;
|
||||||
height: 35px;
|
height: 35px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -377,9 +406,16 @@ pdfjs-viewer-element {
|
||||||
|
|
||||||
/* Bot animation */
|
/* Bot animation */
|
||||||
|
|
||||||
.message.bot {
|
|
||||||
animation: fadein 1.0s ease-in-out forwards;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
/* .message.bot button{
|
||||||
|
text-align: right;
|
||||||
|
background-color: blue;
|
||||||
|
direction: rtl !important;
|
||||||
|
} */
|
||||||
|
|
||||||
details.evidence {
|
details.evidence {
|
||||||
animation: fadein 0.3s ease-in-out forwards;
|
animation: fadein 0.3s ease-in-out forwards;
|
||||||
|
|
|
@ -21,7 +21,7 @@ function run() {
|
||||||
|
|
||||||
// setup conversation dropdown placeholder
|
// setup conversation dropdown placeholder
|
||||||
let conv_dropdown = document.querySelector("#conversation-dropdown input");
|
let conv_dropdown = document.querySelector("#conversation-dropdown input");
|
||||||
conv_dropdown.placeholder = "Browse conversation";
|
conv_dropdown.placeholder = "مرور گفتگو";
|
||||||
|
|
||||||
// move info-expand-button
|
// move info-expand-button
|
||||||
let info_expand_button = document.getElementById("info-expand-button");
|
let info_expand_button = document.getElementById("info-expand-button");
|
||||||
|
|
|
@ -59,8 +59,10 @@ class EmbeddingManager:
|
||||||
LCCohereEmbeddings,
|
LCCohereEmbeddings,
|
||||||
LCGoogleEmbeddings,
|
LCGoogleEmbeddings,
|
||||||
LCHuggingFaceEmbeddings,
|
LCHuggingFaceEmbeddings,
|
||||||
|
LCMistralEmbeddings,
|
||||||
OpenAIEmbeddings,
|
OpenAIEmbeddings,
|
||||||
TeiEndpointEmbeddings,
|
TeiEndpointEmbeddings,
|
||||||
|
VoyageAIEmbeddings,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._vendors = [
|
self._vendors = [
|
||||||
|
@ -70,7 +72,9 @@ class EmbeddingManager:
|
||||||
LCCohereEmbeddings,
|
LCCohereEmbeddings,
|
||||||
LCHuggingFaceEmbeddings,
|
LCHuggingFaceEmbeddings,
|
||||||
LCGoogleEmbeddings,
|
LCGoogleEmbeddings,
|
||||||
|
LCMistralEmbeddings,
|
||||||
TeiEndpointEmbeddings,
|
TeiEndpointEmbeddings,
|
||||||
|
VoyageAIEmbeddings,
|
||||||
]
|
]
|
||||||
|
|
||||||
def __getitem__(self, key: str) -> BaseEmbeddings:
|
def __getitem__(self, key: str) -> BaseEmbeddings:
|
||||||
|
|
|
@ -40,8 +40,8 @@ chat_input_focus_js_with_submit = """
|
||||||
function() {
|
function() {
|
||||||
let chatInput = document.querySelector("#chat-input textarea");
|
let chatInput = document.querySelector("#chat-input textarea");
|
||||||
let chatInputSubmit = document.querySelector("#chat-input button.submit-button");
|
let chatInputSubmit = document.querySelector("#chat-input button.submit-button");
|
||||||
chatInputSubmit.click();
|
|
||||||
chatInput.focus();
|
chatInput.focus();
|
||||||
|
chatInputSubmit.click();
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -1059,15 +1059,18 @@ class FileIndexPage(BasePage):
|
||||||
"""Handle zip files"""
|
"""Handle zip files"""
|
||||||
zip_files = [file for file in files if file.endswith(".zip")]
|
zip_files = [file for file in files if file.endswith(".zip")]
|
||||||
remaining_files = [file for file in files if not file.endswith("zip")]
|
remaining_files = [file for file in files if not file.endswith("zip")]
|
||||||
|
errors: list[str] = []
|
||||||
|
|
||||||
# Clean-up <zip_dir> before unzip to remove old files
|
# Clean-up <zip_dir> before unzip to remove old files
|
||||||
shutil.rmtree(zip_dir, ignore_errors=True)
|
shutil.rmtree(zip_dir, ignore_errors=True)
|
||||||
|
|
||||||
|
# Unzip
|
||||||
for zip_file in zip_files:
|
for zip_file in zip_files:
|
||||||
# Prepare new zip output dir, separated for each files
|
# Prepare new zip output dir, separated for each files
|
||||||
basename = os.path.splitext(os.path.basename(zip_file))[0]
|
basename = os.path.splitext(os.path.basename(zip_file))[0]
|
||||||
zip_out_dir = os.path.join(zip_dir, basename)
|
zip_out_dir = os.path.join(zip_dir, basename)
|
||||||
os.makedirs(zip_out_dir, exist_ok=True)
|
os.makedirs(zip_out_dir, exist_ok=True)
|
||||||
|
|
||||||
with zipfile.ZipFile(zip_file, "r") as zip_ref:
|
with zipfile.ZipFile(zip_file, "r") as zip_ref:
|
||||||
zip_ref.extractall(zip_out_dir)
|
zip_ref.extractall(zip_out_dir)
|
||||||
|
|
||||||
|
@ -1084,7 +1087,7 @@ class FileIndexPage(BasePage):
|
||||||
if n_zip_file > 0:
|
if n_zip_file > 0:
|
||||||
print(f"Update zip files: {n_zip_file}")
|
print(f"Update zip files: {n_zip_file}")
|
||||||
|
|
||||||
return remaining_files
|
return remaining_files, errors
|
||||||
|
|
||||||
def index_fn(
|
def index_fn(
|
||||||
self, files, urls, reindex: bool, settings, user_id
|
self, files, urls, reindex: bool, settings, user_id
|
||||||
|
@ -1100,16 +1103,18 @@ class FileIndexPage(BasePage):
|
||||||
"""
|
"""
|
||||||
if urls:
|
if urls:
|
||||||
files = [it.strip() for it in urls.split("\n")]
|
files = [it.strip() for it in urls.split("\n")]
|
||||||
errors = []
|
errors = self.validate_urls(files)
|
||||||
else:
|
else:
|
||||||
if not files:
|
if not files:
|
||||||
gr.Info("No uploaded file")
|
gr.Info("No uploaded file")
|
||||||
yield "", ""
|
yield "", ""
|
||||||
return
|
return
|
||||||
|
files, unzip_errors = self._may_extract_zip(
|
||||||
|
files, flowsettings.KH_ZIP_INPUT_DIR
|
||||||
|
)
|
||||||
|
errors = self.validate_files(files)
|
||||||
|
errors.extend(unzip_errors)
|
||||||
|
|
||||||
files = self._may_extract_zip(files, flowsettings.KH_ZIP_INPUT_DIR)
|
|
||||||
|
|
||||||
errors = self.validate(files)
|
|
||||||
if errors:
|
if errors:
|
||||||
gr.Warning(", ".join(errors))
|
gr.Warning(", ".join(errors))
|
||||||
yield "", ""
|
yield "", ""
|
||||||
|
@ -1569,7 +1574,7 @@ class FileIndexPage(BasePage):
|
||||||
selected_item["files"],
|
selected_item["files"],
|
||||||
)
|
)
|
||||||
|
|
||||||
def validate(self, files: list[str]):
|
def validate_files(self, files: list[str]):
|
||||||
"""Validate if the files are valid"""
|
"""Validate if the files are valid"""
|
||||||
paths = [Path(file) for file in files]
|
paths = [Path(file) for file in files]
|
||||||
errors = []
|
errors = []
|
||||||
|
@ -1598,6 +1603,14 @@ class FileIndexPage(BasePage):
|
||||||
|
|
||||||
return errors
|
return errors
|
||||||
|
|
||||||
|
def validate_urls(self, urls: list[str]):
|
||||||
|
"""Validate if the urls are valid"""
|
||||||
|
errors = []
|
||||||
|
for url in urls:
|
||||||
|
if not url.startswith("http") and not url.startswith("https"):
|
||||||
|
errors.append(f"Invalid url `{url}`")
|
||||||
|
return errors
|
||||||
|
|
||||||
|
|
||||||
class FileSelector(BasePage):
|
class FileSelector(BasePage):
|
||||||
"""File selector UI in the Chat page"""
|
"""File selector UI in the Chat page"""
|
||||||
|
@ -1618,8 +1631,8 @@ class FileSelector(BasePage):
|
||||||
self.mode = gr.Radio(
|
self.mode = gr.Radio(
|
||||||
value=default_mode,
|
value=default_mode,
|
||||||
choices=[
|
choices=[
|
||||||
("Search All", "all"),
|
(" جستجو همگانی ", "all"),
|
||||||
("Search In File(s)", "select"),
|
(" جستجو در فایل ها ", "select"),
|
||||||
],
|
],
|
||||||
container=False,
|
container=False,
|
||||||
)
|
)
|
||||||
|
|
|
@ -48,12 +48,12 @@ class App(BaseApp):
|
||||||
from ktem.pages.login import LoginPage
|
from ktem.pages.login import LoginPage
|
||||||
|
|
||||||
with gr.Tab(
|
with gr.Tab(
|
||||||
"Welcome", elem_id="login-tab", id="login-tab"
|
"خوش آمدید", elem_id="login-tab", id="login-tab"
|
||||||
) as self._tabs["login-tab"]:
|
) as self._tabs["login-tab"]:
|
||||||
self.login_page = LoginPage(self)
|
self.login_page = LoginPage(self)
|
||||||
|
|
||||||
with gr.Tab(
|
with gr.Tab(
|
||||||
"Chat",
|
"گفتگو",
|
||||||
elem_id="chat-tab",
|
elem_id="chat-tab",
|
||||||
id="chat-tab",
|
id="chat-tab",
|
||||||
visible=not self.f_user_management,
|
visible=not self.f_user_management,
|
||||||
|
@ -77,7 +77,7 @@ class App(BaseApp):
|
||||||
setattr(self, f"_index_{index.id}", page)
|
setattr(self, f"_index_{index.id}", page)
|
||||||
elif len(self.index_manager.indices) > 1:
|
elif len(self.index_manager.indices) > 1:
|
||||||
with gr.Tab(
|
with gr.Tab(
|
||||||
"Files",
|
"فایل ها",
|
||||||
elem_id="indices-tab",
|
elem_id="indices-tab",
|
||||||
elem_classes=["fill-main-area-height", "scrollable", "indices-tab"],
|
elem_classes=["fill-main-area-height", "scrollable", "indices-tab"],
|
||||||
id="indices-tab",
|
id="indices-tab",
|
||||||
|
@ -94,7 +94,7 @@ class App(BaseApp):
|
||||||
if not KH_DEMO_MODE:
|
if not KH_DEMO_MODE:
|
||||||
if not KH_SSO_ENABLED:
|
if not KH_SSO_ENABLED:
|
||||||
with gr.Tab(
|
with gr.Tab(
|
||||||
"Resources",
|
"منابع",
|
||||||
elem_id="resources-tab",
|
elem_id="resources-tab",
|
||||||
id="resources-tab",
|
id="resources-tab",
|
||||||
visible=not self.f_user_management,
|
visible=not self.f_user_management,
|
||||||
|
@ -103,7 +103,7 @@ class App(BaseApp):
|
||||||
self.resources_page = ResourcesTab(self)
|
self.resources_page = ResourcesTab(self)
|
||||||
|
|
||||||
with gr.Tab(
|
with gr.Tab(
|
||||||
"Settings",
|
"تنظیمات",
|
||||||
elem_id="settings-tab",
|
elem_id="settings-tab",
|
||||||
id="settings-tab",
|
id="settings-tab",
|
||||||
visible=not self.f_user_management,
|
visible=not self.f_user_management,
|
||||||
|
@ -112,7 +112,7 @@ class App(BaseApp):
|
||||||
self.settings_page = SettingsPage(self)
|
self.settings_page = SettingsPage(self)
|
||||||
|
|
||||||
with gr.Tab(
|
with gr.Tab(
|
||||||
"Help",
|
"راهنما",
|
||||||
elem_id="help-tab",
|
elem_id="help-tab",
|
||||||
id="help-tab",
|
id="help-tab",
|
||||||
visible=not self.f_user_management,
|
visible=not self.f_user_management,
|
||||||
|
|
|
@ -272,7 +272,7 @@ class ChatPage(BasePage):
|
||||||
|
|
||||||
if len(self._app.index_manager.indices) > 0:
|
if len(self._app.index_manager.indices) > 0:
|
||||||
quick_upload_label = (
|
quick_upload_label = (
|
||||||
"Quick Upload" if not KH_DEMO_MODE else "Or input new paper URL"
|
"بارگذاری" if not KH_DEMO_MODE else "Or input new paper URL"
|
||||||
)
|
)
|
||||||
|
|
||||||
with gr.Accordion(label=quick_upload_label) as _:
|
with gr.Accordion(label=quick_upload_label) as _:
|
||||||
|
@ -287,9 +287,9 @@ class ChatPage(BasePage):
|
||||||
)
|
)
|
||||||
self.quick_urls = gr.Textbox(
|
self.quick_urls = gr.Textbox(
|
||||||
placeholder=(
|
placeholder=(
|
||||||
"Or paste URLs"
|
"یا آدرس وب جایگذاری کنید"
|
||||||
if not KH_DEMO_MODE
|
if not KH_DEMO_MODE
|
||||||
else "Paste Arxiv URLs\n(https://arxiv.org/abs/xxx)"
|
else "آدرس وب جایگذاری کنید\n(https://arxiv.org/abs/xxx)"
|
||||||
),
|
),
|
||||||
lines=1,
|
lines=1,
|
||||||
container=False,
|
container=False,
|
||||||
|
@ -314,17 +314,17 @@ class ChatPage(BasePage):
|
||||||
self.chat_panel = ChatPanel(self._app)
|
self.chat_panel = ChatPanel(self._app)
|
||||||
|
|
||||||
with gr.Accordion(
|
with gr.Accordion(
|
||||||
label="Chat settings",
|
label="تنظیمات گفتگو",
|
||||||
elem_id="chat-settings-expand",
|
elem_id="chat-settings-expand",
|
||||||
open=False,
|
open=False,
|
||||||
visible=not KH_DEMO_MODE,
|
visible=not KH_DEMO_MODE,
|
||||||
) as self.chat_settings:
|
) as self.chat_settings:
|
||||||
with gr.Row(elem_id="quick-setting-labels"):
|
with gr.Row(elem_id="quick-setting-labels"):
|
||||||
gr.HTML("Reasoning method")
|
gr.HTML("روش استدلال")
|
||||||
gr.HTML(
|
gr.HTML(
|
||||||
"Model", visible=not KH_DEMO_MODE and not KH_SSO_ENABLED
|
"مدل", visible=not KH_DEMO_MODE and not KH_SSO_ENABLED
|
||||||
)
|
)
|
||||||
gr.HTML("Language")
|
gr.HTML("زبان")
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
reasoning_setting = (
|
reasoning_setting = (
|
||||||
|
@ -372,7 +372,7 @@ class ChatPage(BasePage):
|
||||||
if not config("USE_LOW_LLM_REQUESTS", default=False, cast=bool):
|
if not config("USE_LOW_LLM_REQUESTS", default=False, cast=bool):
|
||||||
self.use_mindmap = gr.State(value=True)
|
self.use_mindmap = gr.State(value=True)
|
||||||
self.use_mindmap_check = gr.Checkbox(
|
self.use_mindmap_check = gr.Checkbox(
|
||||||
label="Mindmap (on)",
|
label="نقشه ذهنی روشن",
|
||||||
container=False,
|
container=False,
|
||||||
elem_id="use-mindmap-checkbox",
|
elem_id="use-mindmap-checkbox",
|
||||||
value=True,
|
value=True,
|
||||||
|
@ -380,7 +380,7 @@ class ChatPage(BasePage):
|
||||||
else:
|
else:
|
||||||
self.use_mindmap = gr.State(value=False)
|
self.use_mindmap = gr.State(value=False)
|
||||||
self.use_mindmap_check = gr.Checkbox(
|
self.use_mindmap_check = gr.Checkbox(
|
||||||
label="Mindmap (off)",
|
label="نقشه ذهنی خاموش",
|
||||||
container=False,
|
container=False,
|
||||||
elem_id="use-mindmap-checkbox",
|
elem_id="use-mindmap-checkbox",
|
||||||
value=False,
|
value=False,
|
||||||
|
@ -390,7 +390,7 @@ class ChatPage(BasePage):
|
||||||
scale=INFO_PANEL_SCALES[False], elem_id="chat-info-panel"
|
scale=INFO_PANEL_SCALES[False], elem_id="chat-info-panel"
|
||||||
) as self.info_column:
|
) as self.info_column:
|
||||||
with gr.Accordion(
|
with gr.Accordion(
|
||||||
label="Information panel", open=True, elem_id="info-expand"
|
label="پنل اطلاعات", open=True, elem_id="info-expand"
|
||||||
):
|
):
|
||||||
self.modal = gr.HTML("<div id='pdf-modal'></div>")
|
self.modal = gr.HTML("<div id='pdf-modal'></div>")
|
||||||
self.plot_panel = gr.Plot(visible=False)
|
self.plot_panel = gr.Plot(visible=False)
|
||||||
|
|
|
@ -6,15 +6,15 @@ KH_DEMO_MODE = getattr(flowsettings, "KH_DEMO_MODE", False)
|
||||||
|
|
||||||
if not KH_DEMO_MODE:
|
if not KH_DEMO_MODE:
|
||||||
PLACEHOLDER_TEXT = (
|
PLACEHOLDER_TEXT = (
|
||||||
"This is the beginning of a new conversation.\n"
|
".این شروع یک گفتگوی جدید است\n"
|
||||||
"Start by uploading a file or a web URL. "
|
".با بارگذاری یک فایل یا یک آدرس وب شروع کنید\n "
|
||||||
"Visit Files tab for more options (e.g: GraphRAG)."
|
".برای گزینه های بیشتر به برگه فایل ها مراجعه کنید "
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
PLACEHOLDER_TEXT = (
|
PLACEHOLDER_TEXT = (
|
||||||
"Welcome to Kotaemon Demo. "
|
".به دموی دیتال چت خوش آمدید \n"
|
||||||
"Start by browsing preloaded conversations to get onboard.\n"
|
".برای شروع، مکالمات قبلی بارگذاری شده را مرور کنید\n"
|
||||||
"Check out Hint section for more tips."
|
".برای نکات بیشتر به بخش راهنمایی مراجعه کنید"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -28,6 +28,7 @@ class ChatPanel(BasePage):
|
||||||
label=self._app.app_name,
|
label=self._app.app_name,
|
||||||
placeholder=PLACEHOLDER_TEXT,
|
placeholder=PLACEHOLDER_TEXT,
|
||||||
show_label=False,
|
show_label=False,
|
||||||
|
rtl = True,
|
||||||
elem_id="main-chat-bot",
|
elem_id="main-chat-bot",
|
||||||
show_copy_button=True,
|
show_copy_button=True,
|
||||||
likeable=True,
|
likeable=True,
|
||||||
|
@ -37,9 +38,10 @@ class ChatPanel(BasePage):
|
||||||
self.text_input = gr.MultimodalTextbox(
|
self.text_input = gr.MultimodalTextbox(
|
||||||
interactive=True,
|
interactive=True,
|
||||||
scale=20,
|
scale=20,
|
||||||
|
rtl=True,
|
||||||
file_count="multiple",
|
file_count="multiple",
|
||||||
placeholder=(
|
placeholder=(
|
||||||
"Type a message, search the @web, or tag a file with @filename"
|
"یک پیام بنویسید"
|
||||||
),
|
),
|
||||||
container=False,
|
container=False,
|
||||||
show_label=False,
|
show_label=False,
|
||||||
|
|
|
@ -51,8 +51,8 @@ class ConversationControl(BasePage):
|
||||||
|
|
||||||
def on_building_ui(self):
|
def on_building_ui(self):
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
title_text = "Conversations" if not KH_DEMO_MODE else "Kotaemon Papers"
|
title_text = "گفتگو ها" if not KH_DEMO_MODE else "Kotaemon Papers"
|
||||||
gr.Markdown("## {}".format(title_text))
|
gr.Markdown(f'<div dir="rtl"> {title_text}</div>')
|
||||||
self.btn_toggle_dark_mode = gr.Button(
|
self.btn_toggle_dark_mode = gr.Button(
|
||||||
value="",
|
value="",
|
||||||
icon=f"{ASSETS_DIR}/dark_mode.svg",
|
icon=f"{ASSETS_DIR}/dark_mode.svg",
|
||||||
|
@ -66,7 +66,7 @@ class ConversationControl(BasePage):
|
||||||
icon=f"{ASSETS_DIR}/expand.svg",
|
icon=f"{ASSETS_DIR}/expand.svg",
|
||||||
scale=1,
|
scale=1,
|
||||||
size="sm",
|
size="sm",
|
||||||
elem_classes=["no-background", "body-text-color"],
|
elem_classes=["no-background", "body-text-color" , "top-left-button"],
|
||||||
elem_id="chat-expand-button",
|
elem_id="chat-expand-button",
|
||||||
)
|
)
|
||||||
self.btn_info_expand = gr.Button(
|
self.btn_info_expand = gr.Button(
|
||||||
|
@ -75,7 +75,7 @@ class ConversationControl(BasePage):
|
||||||
min_width=2,
|
min_width=2,
|
||||||
scale=1,
|
scale=1,
|
||||||
size="sm",
|
size="sm",
|
||||||
elem_classes=["no-background", "body-text-color"],
|
elem_classes=["no-background", "body-text-color" , "top-left-button"],
|
||||||
elem_id="info-expand-button",
|
elem_id="info-expand-button",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -102,7 +102,7 @@ class ConversationControl(BasePage):
|
||||||
with gr.Row() as self._new_delete:
|
with gr.Row() as self._new_delete:
|
||||||
self.cb_suggest_chat = gr.Checkbox(
|
self.cb_suggest_chat = gr.Checkbox(
|
||||||
value=False,
|
value=False,
|
||||||
label="Suggest chat",
|
label=" پیشنهاد گفتگو ",
|
||||||
min_width=10,
|
min_width=10,
|
||||||
scale=6,
|
scale=6,
|
||||||
elem_id="suggest-chat-checkbox",
|
elem_id="suggest-chat-checkbox",
|
||||||
|
@ -111,7 +111,7 @@ class ConversationControl(BasePage):
|
||||||
)
|
)
|
||||||
self.cb_is_public = gr.Checkbox(
|
self.cb_is_public = gr.Checkbox(
|
||||||
value=False,
|
value=False,
|
||||||
label="Share this conversation",
|
label="این گفتگو را ارسال کن",
|
||||||
elem_id="is-public-checkbox",
|
elem_id="is-public-checkbox",
|
||||||
container=False,
|
container=False,
|
||||||
visible=not KH_DEMO_MODE and not KH_SSO_ENABLED,
|
visible=not KH_DEMO_MODE and not KH_SSO_ENABLED,
|
||||||
|
|
|
@ -12,34 +12,31 @@ class ReportIssue(BasePage):
|
||||||
self.on_building_ui()
|
self.on_building_ui()
|
||||||
|
|
||||||
def on_building_ui(self):
|
def on_building_ui(self):
|
||||||
with gr.Accordion(label="Feedback", open=False, elem_id="report-accordion"):
|
with gr.Accordion(label="بازخورد", open=False, elem_id="report-accordion"):
|
||||||
self.correctness = gr.Radio(
|
self.correctness = gr.Radio(
|
||||||
choices=[
|
choices=[
|
||||||
("The answer is correct", "correct"),
|
(" پاسخ صحیح است ", "correct"),
|
||||||
("The answer is incorrect", "incorrect"),
|
(" پاسخ اشتباه است ", "incorrect"),
|
||||||
],
|
],
|
||||||
label="Correctness:",
|
label="صحت سنجی:",
|
||||||
)
|
)
|
||||||
self.issues = gr.CheckboxGroup(
|
self.issues = gr.CheckboxGroup(
|
||||||
choices=[
|
choices=[
|
||||||
("The answer is offensive", "offensive"),
|
(" پاسخ نامحترمانه است ", "offensive"),
|
||||||
("The evidence is incorrect", "wrong-evidence"),
|
(" مدارک اشتباه است ", "wrong-evidence"),
|
||||||
],
|
],
|
||||||
label="Other issue:",
|
label="دیگر مشکلات:",
|
||||||
)
|
)
|
||||||
self.more_detail = gr.Textbox(
|
self.more_detail = gr.Textbox(
|
||||||
placeholder=(
|
placeholder=(
|
||||||
"More detail (e.g. how wrong is it, what is the "
|
"جزئیات بیشتر (مثلا چقدر اشتباه است، پاسخ صحیح چیست، و غیره...)"
|
||||||
"correct answer, etc...)"
|
|
||||||
),
|
),
|
||||||
container=False,
|
container=False,
|
||||||
lines=3,
|
lines=3,
|
||||||
)
|
)
|
||||||
gr.Markdown(
|
alert_text = "این عمل، چت فعلی و تنظیمات کاربر را برای کمک به تحقیق ارسال خواهد کرد"
|
||||||
"This will send the current chat and the user settings to "
|
gr.Markdown(f'<div dir="rtl"> {alert_text}</div>')
|
||||||
"help with investigation"
|
self.report_btn = gr.Button("گزارش")
|
||||||
)
|
|
||||||
self.report_btn = gr.Button("Report")
|
|
||||||
|
|
||||||
def report(
|
def report(
|
||||||
self,
|
self,
|
||||||
|
@ -83,4 +80,4 @@ class ReportIssue(BasePage):
|
||||||
)
|
)
|
||||||
session.add(issue)
|
session.add(issue)
|
||||||
session.commit()
|
session.commit()
|
||||||
gr.Info("Thank you for your feedback")
|
gr.Info("از بازخورد شما متشکریم")
|
||||||
|
|
|
@ -52,9 +52,13 @@ class RerankingManager:
|
||||||
self._default = item.name
|
self._default = item.name
|
||||||
|
|
||||||
def load_vendors(self):
|
def load_vendors(self):
|
||||||
from kotaemon.rerankings import CohereReranking, TeiFastReranking
|
from kotaemon.rerankings import (
|
||||||
|
CohereReranking,
|
||||||
|
TeiFastReranking,
|
||||||
|
VoyageAIReranking,
|
||||||
|
)
|
||||||
|
|
||||||
self._vendors = [TeiFastReranking, CohereReranking]
|
self._vendors = [TeiFastReranking, CohereReranking, VoyageAIReranking]
|
||||||
|
|
||||||
def __getitem__(self, key: str) -> BaseReranking:
|
def __getitem__(self, key: str) -> BaseReranking:
|
||||||
"""Get model by name"""
|
"""Get model by name"""
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
SUPPORTED_LANGUAGE_MAP = {
|
SUPPORTED_LANGUAGE_MAP = {
|
||||||
"en": "English",
|
"en": "English",
|
||||||
|
"fa": "Persian",
|
||||||
"ja": "Japanese",
|
"ja": "Japanese",
|
||||||
"vi": "Vietnamese",
|
"vi": "Vietnamese",
|
||||||
"es": "Spanish",
|
"es": "Spanish",
|
||||||
|
|
Loading…
Reference in New Issue
Block a user