Add persian lang

add json compatibility for upload file , change app main name , add persian languange compatibility
fix: add validation to avoid path-traversal vulnerabilities (#755 )
2025-07-27 10:12:01 +03:30 · 2025-07-24 15:25:08 +03:30 · 2025-07-02 14:50:40 +07:00 · 2025-07-01 17:11:22 +07:00 · 2025-06-05 16:35:00 +07:00 · 2025-06-05 16:08:49 +07:00
30 changed files with 496 additions and 82 deletions
--- a/.env.example
+++ b/.env.example
@ -1,3 +1,4 @@
+
 # this is an example .env file, use it to create your own .env file and place it in the root of the project

 # settings for OpenAI
@ -16,6 +17,12 @@ AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=text-embedding-ada-002
 # settings for Cohere
 COHERE_API_KEY=<COHERE_API_KEY>

+# settings for Mistral
+# MISTRAL_API_KEY=placeholder
+
+# settings for VoyageAI
+VOYAGE_API_KEY=<VOYAGE_API_KEY>
+
 # settings for local models
 LOCAL_MODEL=qwen2.5:7b
 LOCAL_MODEL_EMBEDDINGS=nomic-embed-text
--- a/flowsettings.py
+++ b/flowsettings.py
@ -172,6 +172,25 @@ if OPENAI_API_KEY:
        "default": IS_OPENAI_DEFAULT,
    }

+VOYAGE_API_KEY = config("VOYAGE_API_KEY", default="")
+if VOYAGE_API_KEY:
+    KH_EMBEDDINGS["voyageai"] = {
+        "spec": {
+            "__type__": "kotaemon.embeddings.VoyageAIEmbeddings",
+            "api_key": VOYAGE_API_KEY,
+            "model": config("VOYAGE_EMBEDDINGS_MODEL", default="voyage-3-large"),
+        },
+        "default": False,
+    }
+    KH_RERANKINGS["voyageai"] = {
+        "spec": {
+            "__type__": "kotaemon.rerankings.VoyageAIReranking",
+            "model_name": "rerank-2",
+            "api_key": VOYAGE_API_KEY,
+        },
+        "default": False,
+    }
+
 if config("LOCAL_MODEL", default=""):
    KH_LLMS["ollama"] = {
        "spec": {
@ -243,6 +262,15 @@ KH_LLMS["cohere"] = {
    },
    "default": False,
 }
+KH_LLMS["mistral"] = {
+    "spec": {
+        "__type__": "kotaemon.llms.ChatOpenAI",
+        "base_url": "https://api.mistral.ai/v1",
+        "model": "ministral-8b-latest",
+        "api_key": config("MISTRAL_API_KEY", default="your-key"),
+    },
+    "default": False,
+}

 # additional embeddings configurations
 KH_EMBEDDINGS["cohere"] = {
@ -262,6 +290,14 @@ KH_EMBEDDINGS["google"] = {
    },
    "default": not IS_OPENAI_DEFAULT,
 }
+KH_EMBEDDINGS["mistral"] = {
+    "spec": {
+        "__type__": "kotaemon.embeddings.LCMistralEmbeddings",
+        "model": "mistral-embed",
+        "api_key": config("MISTRAL_API_KEY", default="your-key"),
+    },
+    "default": False,
+}
 # KH_EMBEDDINGS["huggingface"] = {
 #     "spec": {
 #         "__type__": "kotaemon.embeddings.LCHuggingFaceEmbeddings",
@ -343,7 +379,7 @@ GRAPHRAG_INDICES = [
        "config": {
            "supported_file_types": (
                ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
-                ".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
+                ".pptx, .csv, .html, .mhtml, .txt, .md, .zip, .json"
            ),
            "private": True,
        },
--- a/libs/kotaemon/kotaemon/base/init.py
+++ b/libs/kotaemon/kotaemon/base/init.py
@ -8,6 +8,7 @@ from .schema import (
    HumanMessage,
    LLMInterface,
    RetrievedDocument,
+    StructuredOutputLLMInterface,
    SystemMessage,
 )

@ -21,6 +22,7 @@ __all__ = [
    "HumanMessage",
    "RetrievedDocument",
    "LLMInterface",
+    "StructuredOutputLLMInterface",
    "ExtractorOutput",
    "Param",
    "Node",
--- a/libs/kotaemon/kotaemon/base/schema.py
+++ b/libs/kotaemon/kotaemon/base/schema.py
@ -143,6 +143,11 @@ class LLMInterface(AIMessage):
    logprobs: list[float] = []


+class StructuredOutputLLMInterface(LLMInterface):
+    parsed: Any
+    refusal: str = ""
+
+
 class ExtractorOutput(Document):
    """
    Represents the output of an extractor.
--- a/libs/kotaemon/kotaemon/embeddings/init.py
+++ b/libs/kotaemon/kotaemon/embeddings/init.py
@ -6,10 +6,12 @@ from .langchain_based import (
    LCCohereEmbeddings,
    LCGoogleEmbeddings,
    LCHuggingFaceEmbeddings,
+    LCMistralEmbeddings,
    LCOpenAIEmbeddings,
 )
 from .openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
 from .tei_endpoint_embed import TeiEndpointEmbeddings
+from .voyageai import VoyageAIEmbeddings

 __all__ = [
    "BaseEmbeddings",
@ -20,7 +22,9 @@ __all__ = [
    "LCCohereEmbeddings",
    "LCHuggingFaceEmbeddings",
    "LCGoogleEmbeddings",
+    "LCMistralEmbeddings",
    "OpenAIEmbeddings",
    "AzureOpenAIEmbeddings",
    "FastEmbedEmbeddings",
+    "VoyageAIEmbeddings",
 ]
--- a/libs/kotaemon/kotaemon/embeddings/langchain_based.py
+++ b/libs/kotaemon/kotaemon/embeddings/langchain_based.py
@ -254,3 +254,40 @@ class LCGoogleEmbeddings(LCEmbeddingMixin, BaseEmbeddings):
            raise ImportError("Please install langchain-google-genai")

        return GoogleGenerativeAIEmbeddings
+
+
+class LCMistralEmbeddings(LCEmbeddingMixin, BaseEmbeddings):
+    """Wrapper around LangChain's MistralAI embedding, focusing on key parameters"""
+
+    api_key: str = Param(
+        help="API key (https://console.mistral.ai/api-keys)",
+        default=None,
+        required=True,
+    )
+    model: str = Param(
+        help="Model name to use ('mistral-embed')",
+        default="mistral-embed",
+        required=True,
+    )
+
+    def __init__(
+        self,
+        model: str = "mistral-embed",
+        api_key: Optional[str] = None,
+        **params,
+    ):
+        super().__init__(
+            model=model,
+            api_key=api_key,
+            **params,
+        )
+
+    def _get_lc_class(self):
+        try:
+            from langchain_mistralai import MistralAIEmbeddings
+        except ImportError:
+            raise ImportError(
+                "Please install langchain_mistralai: "
+                "`pip install -U langchain_mistralai`"
+            )
+        return MistralAIEmbeddings
--- a/libs/kotaemon/kotaemon/embeddings/voyageai.py
+++ b/libs/kotaemon/kotaemon/embeddings/voyageai.py
@ -0,0 +1,66 @@
+"""Implements embeddings from [Voyage AI](https://voyageai.com).
+"""
+
+import importlib
+
+from kotaemon.base import Document, DocumentWithEmbedding, Param
+
+from .base import BaseEmbeddings
+
+vo = None
+
+
+def _import_voyageai():
+    global vo
+    if not vo:
+        vo = importlib.import_module("voyageai")
+    return vo
+
+
+def _format_output(texts: list[str], embeddings: list[list]):
+    """Formats the output of all `.embed` calls.
+    Args:
+        texts: List of original documents
+        embeddings: Embeddings corresponding to each document
+    """
+    return [
+        DocumentWithEmbedding(content=text, embedding=embedding)
+        for text, embedding in zip(texts, embeddings)
+    ]
+
+
+class VoyageAIEmbeddings(BaseEmbeddings):
+    """Voyage AI provides best-in-class embedding models and rerankers."""
+
+    api_key: str = Param(None, help="Voyage API key", required=False)
+    model: str = Param(
+        "voyage-3",
+        help=(
+            "Model name to use. The Voyage "
+            "[documentation](https://docs.voyageai.com/docs/embeddings) "
+            "provides a list of all available embedding models."
+        ),
+        required=True,
+    )
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if not self.api_key:
+            raise ValueError("API key must be provided for VoyageAIEmbeddings.")
+
+        self._client = _import_voyageai().Client(api_key=self.api_key)
+        self._aclient = _import_voyageai().AsyncClient(api_key=self.api_key)
+
+    def invoke(
+        self, text: str | list[str] | Document | list[Document], *args, **kwargs
+    ) -> list[DocumentWithEmbedding]:
+        texts = [t.content for t in self.prepare_input(text)]
+        embeddings = self._client.embed(texts, model=self.model).embeddings
+        return _format_output(texts, embeddings)
+
+    async def ainvoke(
+        self, text: str | list[str] | Document | list[Document], *args, **kwargs
+    ) -> list[DocumentWithEmbedding]:
+        texts = [t.content for t in self.prepare_input(text)]
+        embeddings = await self._aclient.embed(texts, model=self.model).embeddings
+        return _format_output(texts, embeddings)
--- a/libs/kotaemon/kotaemon/indices/vectorindex.py
+++ b/libs/kotaemon/kotaemon/indices/vectorindex.py
@ -168,7 +168,7 @@ class VectorRetrieval(BaseRetrieval):
        if self.retrieval_mode == "vector":
            emb = self.embedding(text)[0].embedding
            _, scores, ids = self.vector_store.query(
-                embedding=emb, top_k=top_k_first_round, **kwargs
+                embedding=emb, top_k=top_k_first_round, doc_ids=scope, **kwargs
            )
            docs = self.doc_store.get(ids)
            result = [
@ -197,7 +197,7 @@ class VectorRetrieval(BaseRetrieval):

                assert self.doc_store is not None
                _, vs_scores, vs_ids = self.vector_store.query(
-                    embedding=emb, top_k=top_k_first_round, **kwargs
+                    embedding=emb, top_k=top_k_first_round, doc_ids=scope, **kwargs
                )
                if vs_ids:
                    vs_docs = self.doc_store.get(vs_ids)
--- a/libs/kotaemon/kotaemon/llms/init.py
+++ b/libs/kotaemon/kotaemon/llms/init.py
@ -14,6 +14,7 @@ from .chats import (
    LCGeminiChat,
    LCOllamaChat,
    LlamaCppChat,
+    StructuredOutputChatOpenAI,
 )
 from .completions import LLM, AzureOpenAI, LlamaCpp, OpenAI
 from .cot import ManualSequentialChainOfThought, Thought
@ -31,6 +32,7 @@ __all__ = [
    "SystemMessage",
    "AzureChatOpenAI",
    "ChatOpenAI",
+    "StructuredOutputChatOpenAI",
    "LCAnthropicChat",
    "LCGeminiChat",
    "LCCohereChat",
--- a/libs/kotaemon/kotaemon/llms/chats/init.py
+++ b/libs/kotaemon/kotaemon/llms/chats/init.py
@ -10,7 +10,7 @@ from .langchain_based import (
    LCOllamaChat,
 )
 from .llamacpp import LlamaCppChat
-from .openai import AzureChatOpenAI, ChatOpenAI
+from .openai import AzureChatOpenAI, ChatOpenAI, StructuredOutputChatOpenAI

 __all__ = [
    "ChatOpenAI",
@ -18,6 +18,7 @@ __all__ = [
    "ChatLLM",
    "EndpointChatLLM",
    "ChatOpenAI",
+    "StructuredOutputChatOpenAI",
    "LCAnthropicChat",
    "LCGeminiChat",
    "LCCohereChat",
--- a/libs/kotaemon/kotaemon/llms/chats/openai.py
+++ b/libs/kotaemon/kotaemon/llms/chats/openai.py
@ -1,8 +1,16 @@
-from typing import TYPE_CHECKING, AsyncGenerator, Iterator, Optional
+from typing import TYPE_CHECKING, AsyncGenerator, Iterator, Optional, Type

+from pydantic import BaseModel
 from theflow.utils.modules import import_dotted_string

-from kotaemon.base import AIMessage, BaseMessage, HumanMessage, LLMInterface, Param
+from kotaemon.base import (
+    AIMessage,
+    BaseMessage,
+    HumanMessage,
+    LLMInterface,
+    Param,
+    StructuredOutputLLMInterface,
+)

 from .base import ChatLLM

@ -330,6 +338,88 @@ class ChatOpenAI(BaseChatOpenAI):
        return await client.chat.completions.create(**params)


+class StructuredOutputChatOpenAI(ChatOpenAI):
+    """OpenAI chat model that returns structured output"""
+
+    response_schema: Type[BaseModel] = Param(
+        help="class that subclasses pydantics BaseModel", required=True
+    )
+
+    def prepare_output(self, resp: dict) -> StructuredOutputLLMInterface:
+        """Convert the OpenAI response into StructuredOutputLLMInterface"""
+        additional_kwargs = {}
+
+        if "tool_calls" in resp["choices"][0]["message"]:
+            additional_kwargs["tool_calls"] = resp["choices"][0]["message"][
+                "tool_calls"
+            ]
+
+        if resp["choices"][0].get("logprobs") is None:
+            logprobs = []
+        else:
+            all_logprobs = resp["choices"][0]["logprobs"].get("content")
+            logprobs = (
+                [logprob["logprob"] for logprob in all_logprobs] if all_logprobs else []
+            )
+
+        output = StructuredOutputLLMInterface(
+            parsed=resp["choices"][0]["message"]["parsed"],
+            candidates=[(_["message"]["content"] or "") for _ in resp["choices"]],
+            content=resp["choices"][0]["message"]["content"] or "",
+            total_tokens=resp["usage"]["total_tokens"],
+            prompt_tokens=resp["usage"]["prompt_tokens"],
+            completion_tokens=resp["usage"]["completion_tokens"],
+            messages=[
+                AIMessage(content=(_["message"]["content"]) or "")
+                for _ in resp["choices"]
+            ],
+            additional_kwargs=additional_kwargs,
+            logprobs=logprobs,
+        )
+
+        return output
+
+    def prepare_params(self, **kwargs):
+        if "tools_pydantic" in kwargs:
+            kwargs.pop("tools_pydantic")
+
+        params_ = {
+            "model": self.model,
+            "temperature": self.temperature,
+            "max_tokens": self.max_tokens,
+            "n": self.n,
+            "stop": self.stop,
+            "frequency_penalty": self.frequency_penalty,
+            "presence_penalty": self.presence_penalty,
+            "tool_choice": self.tool_choice,
+            "tools": self.tools,
+            "logprobs": self.logprobs,
+            "logit_bias": self.logit_bias,
+            "top_logprobs": self.top_logprobs,
+            "top_p": self.top_p,
+            "response_format": self.response_schema,
+        }
+        params = {k: v for k, v in params_.items() if v is not None}
+        params.update(kwargs)
+
+        # doesn't do streaming
+        params.pop("stream")
+
+        return params
+
+    def openai_response(self, client, **kwargs):
+        """Get the openai response"""
+        params = self.prepare_params(**kwargs)
+
+        return client.beta.chat.completions.parse(**params)
+
+    async def aopenai_response(self, client, **kwargs):
+        """Get the openai response"""
+        params = self.prepare_params(**kwargs)
+
+        return await client.beta.chat.completions.parse(**params)
+
+
 class AzureChatOpenAI(BaseChatOpenAI):
    """OpenAI chat model provided by Microsoft Azure"""

--- a/libs/kotaemon/kotaemon/rerankings/init.py
+++ b/libs/kotaemon/kotaemon/rerankings/init.py
@ -1,5 +1,6 @@
 from .base import BaseReranking
 from .cohere import CohereReranking
 from .tei_fast_rerank import TeiFastReranking
+from .voyageai import VoyageAIReranking

-__all__ = ["BaseReranking", "TeiFastReranking", "CohereReranking"]
+__all__ = ["BaseReranking", "TeiFastReranking", "CohereReranking", "VoyageAIReranking"]
--- a/libs/kotaemon/kotaemon/rerankings/cohere.py
+++ b/libs/kotaemon/kotaemon/rerankings/cohere.py
@ -1,5 +1,7 @@
 from __future__ import annotations

+import os
+
 from decouple import config

 from kotaemon.base import Document, Param
@ -23,6 +25,11 @@ class CohereReranking(BaseReranking):
        help="Cohere API key",
        required=True,
    )
+    base_url: str = Param(
+        None,
+        help="Rerank API base url. Default is https://api.cohere.com",
+        required=False,
+    )

    def run(self, documents: list[Document], query: str) -> list[Document]:
        """Use Cohere Reranker model to re-order documents
@ -38,7 +45,9 @@ class CohereReranking(BaseReranking):
            print("Cohere API key not found. Skipping rerankings.")
            return documents

-        cohere_client = cohere.Client(self.cohere_api_key)
+        cohere_client = cohere.Client(
+            self.cohere_api_key, base_url=self.base_url or os.getenv("CO_API_URL")
+        )
        compressed_docs: list[Document] = []

        if not documents:  # to avoid empty api call
--- a/libs/kotaemon/kotaemon/rerankings/voyageai.py
+++ b/libs/kotaemon/kotaemon/rerankings/voyageai.py
@ -0,0 +1,63 @@
+from __future__ import annotations
+
+import importlib
+
+from decouple import config
+
+from kotaemon.base import Document, Param
+
+from .base import BaseReranking
+
+vo = None
+
+
+def _import_voyageai():
+    global vo
+    if not vo:
+        vo = importlib.import_module("voyageai")
+    return vo
+
+
+class VoyageAIReranking(BaseReranking):
+    """VoyageAI Reranking model"""
+
+    model_name: str = Param(
+        "rerank-2",
+        help=(
+            "ID of the model to use. You can go to [Supported Models]"
+            "(https://docs.voyageai.com/docs/reranker) to see the supported models"
+        ),
+        required=True,
+    )
+    api_key: str = Param(
+        config("VOYAGE_API_KEY", ""),
+        help="VoyageAI API key",
+        required=True,
+    )
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if not self.api_key:
+            raise ValueError("API key must be provided for VoyageAIEmbeddings.")
+
+        self._client = _import_voyageai().Client(api_key=self.api_key)
+        self._aclient = _import_voyageai().AsyncClient(api_key=self.api_key)
+
+    def run(self, documents: list[Document], query: str) -> list[Document]:
+        """Use VoyageAI Reranker model to re-order documents
+        with their relevance score"""
+        compressed_docs: list[Document] = []
+
+        if not documents:  # to avoid empty api call
+            return compressed_docs
+
+        _docs = [d.content for d in documents]
+        response = self._client.rerank(
+            model=self.model_name, query=query, documents=_docs
+        )
+        for r in response.results:
+            doc = documents[r.index]
+            doc.metadata["reranking_score"] = r.relevance_score
+            compressed_docs.append(doc)
+
+        return compressed_docs
--- a/libs/kotaemon/kotaemon/storages/docstores/lancedb.py
+++ b/libs/kotaemon/kotaemon/storages/docstores/lancedb.py
@ -113,14 +113,18 @@ class LanceDBDocumentStore(BaseDocumentStore):
            )
        except (ValueError, FileNotFoundError):
            docs = []
-        return [
-            Document(
+
+        # return the documents using the order of original
+        # ids (which were ordered by score)
+        doc_dict = {
+            doc["id"]: Document(
                id_=doc["id"],
                text=doc["text"] if doc["text"] else "<empty>",
                metadata=json.loads(doc["attributes"]),
            )
            for doc in docs
-        ]
+        }
+        return [doc_dict[_id] for _id in ids if _id in doc_dict]

    def delete(self, ids: Union[List[str], str], refresh_indices: bool = True):
        """Delete document by id"""
--- a/libs/kotaemon/pyproject.toml
+++ b/libs/kotaemon/pyproject.toml
@ -36,6 +36,7 @@ dependencies = [
    "langchain-google-genai>=1.0.3,<2.0.0",
    "langchain-anthropic",
    "langchain-ollama",
+    "langchain-mistralai",
    "langchain-cohere>=0.2.4,<0.3.0",
    "llama-hub>=0.0.79,<0.1.0",
    "llama-index>=0.10.40,<0.11.0",
@ -89,6 +90,7 @@ adv = [
    "tabulate",
    "unstructured>=0.15.8,<0.16",
    "wikipedia>=1.4.0,<1.5",
+    "voyageai>=0.3.0",
 ]
 dev = [
    "black",
--- a/libs/kotaemon/tests/conftest.py
+++ b/libs/kotaemon/tests/conftest.py
@ -70,6 +70,15 @@ def if_llama_cpp_not_installed():
        return False


+def if_voyageai_not_installed():
+    try:
+        import voyageai  # noqa: F401
+    except ImportError:
+        return True
+    else:
+        return False
+
+
 skip_when_haystack_not_installed = pytest.mark.skipif(
    if_haystack_not_installed(), reason="Haystack is not installed"
 )
@ -97,3 +106,7 @@ skip_openai_lc_wrapper_test = pytest.mark.skipif(
 skip_llama_cpp_not_installed = pytest.mark.skipif(
    if_llama_cpp_not_installed(), reason="llama_cpp is not installed"
 )
+
+skip_when_voyageai_not_installed = pytest.mark.skipif(
+    if_voyageai_not_installed(), reason="voyageai is not installed"
+)
--- a/libs/kotaemon/tests/test_embedding_models.py
+++ b/libs/kotaemon/tests/test_embedding_models.py
@ -1,22 +1,24 @@
 import json
 from pathlib import Path
-from unittest.mock import patch
+from unittest.mock import Mock, patch

 from openai.types.create_embedding_response import CreateEmbeddingResponse

-from kotaemon.base import Document
+from kotaemon.base import Document, DocumentWithEmbedding
 from kotaemon.embeddings import (
    AzureOpenAIEmbeddings,
    FastEmbedEmbeddings,
    LCCohereEmbeddings,
    LCHuggingFaceEmbeddings,
    OpenAIEmbeddings,
+    VoyageAIEmbeddings,
 )

 from .conftest import (
    skip_when_cohere_not_installed,
    skip_when_fastembed_not_installed,
    skip_when_sentence_bert_not_installed,
+    skip_when_voyageai_not_installed,
 )

 with open(Path(__file__).parent / "resources" / "embedding_openai_batch.json") as f:
@ -155,3 +157,16 @@ def test_fastembed_embeddings():
    model = FastEmbedEmbeddings()
    output = model("Hello World")
    assert_embedding_result(output)
+
+
+voyage_output_mock = Mock()
+voyage_output_mock.embeddings = [[1.0, 2.1, 3.2]]
+
+
+@skip_when_voyageai_not_installed
+@patch("voyageai.Client.embed", return_value=voyage_output_mock)
+@patch("voyageai.AsyncClient.embed", return_value=voyage_output_mock)
+def test_voyageai_embeddings(sync_call, async_call):
+    model = VoyageAIEmbeddings(api_key="test")
+    output = model("Hello, world!")
+    assert all(isinstance(doc, DocumentWithEmbedding) for doc in output)
--- a/libs/ktem/ktem/app.py
+++ b/libs/ktem/ktem/app.py
@ -39,7 +39,7 @@ class BaseApp:

    def __init__(self):
        self.dev_mode = getattr(settings, "KH_MODE", "") == "dev"
-        self.app_name = getattr(settings, "KH_APP_NAME", "Kotaemon")
+        self.app_name = getattr(settings, "KH_APP_NAME", "DatallChat")
        self.app_version = getattr(settings, "KH_APP_VERSION", "")
        self.f_user_management = getattr(settings, "KH_FEATURE_USER_MANAGEMENT", False)
        self._theme = KotaemonTheme()
--- a/libs/ktem/ktem/assets/css/main.css
+++ b/libs/ktem/ktem/assets/css/main.css
@ -1,7 +1,10 @@
 :root {
  --main-area-height: calc(100vh - 110px);
+  direction: rtl;
 }

+
+
 /* no footer */
 footer {
  display: none !important;
@ -27,6 +30,11 @@ footer {
  height: 100% !important; */
 }

+input[type="radio"] {
+  margin-left: 5px;
+}
+
+.gradio-container
 /* styling for header bar */
 .header-bar {
  background-color: transparent;
@ -168,6 +176,27 @@ mark {
  color: var(--body-text-color);
 }

+#chat-input textarea{
+ direction: rtl;
+}
+#chat-input button.submit-button{
+  margin-left: 3px;
+  margin-right: 3px;
+  transform: scaleX(-1);
+  -moz-transform: scaleX(-1);
+  -webkit-transform: scaleX(-1);
+}
+
+.secondary-wrap {
+  position: relative;
+}
+
+.secondary-wrap .icon-wrap{
+  /* direction: ltr; */
+  position:absolute;
+  right: 90%;  
+}
+
 /* for setting right-aligned buttons */
 .right-button {
  min-width: 200px !important;
@ -195,13 +224,13 @@ mark {
 #toggle-dark-button {
  position: fixed;
  top: 6px;
-  right: 30px;
+  left: 30px;
 }

 #info-expand-button {
  position: absolute;
  top: 6px;
-  right: 15px;
+  left: 30px;
 }

 /* prevent overflow of html info panel */
@ -212,7 +241,7 @@ mark {
 #chat-expand-button {
  position: absolute;
  top: 6px;
-  right: -10px;
+  left: -10px;
  z-index: 1;
 }

@ -231,14 +260,14 @@ mark {
  position: absolute;
  width: 110px;
  top: 10px;
-  right: 25px;
+  left: 15px;
 }

 #citation-dropdown {
  width: min(25%, 100px);
  position: absolute;
  top: 2px;
-  left: 120px;
+  right: 120px;
  height: 35px;
 }

@ -377,9 +406,16 @@ pdfjs-viewer-element {

 /* Bot animation */

-.message.bot {
-  animation: fadein 1.0s ease-in-out forwards;
-}
+
+
+
+
+
+/* .message.bot button{
+  text-align: right;
+  background-color: blue;
+  direction: rtl !important;
+} */

 details.evidence {
  animation: fadein 0.3s ease-in-out forwards;
--- a/libs/ktem/ktem/assets/js/main.js
+++ b/libs/ktem/ktem/assets/js/main.js
@ -21,7 +21,7 @@ function run() {

  // setup conversation dropdown placeholder
  let conv_dropdown = document.querySelector("#conversation-dropdown input");
-  conv_dropdown.placeholder = "Browse conversation";
+  conv_dropdown.placeholder = "مرور گفتگو";

  // move info-expand-button
  let info_expand_button = document.getElementById("info-expand-button");
--- a/libs/ktem/ktem/embeddings/manager.py
+++ b/libs/ktem/ktem/embeddings/manager.py
@ -59,8 +59,10 @@ class EmbeddingManager:
            LCCohereEmbeddings,
            LCGoogleEmbeddings,
            LCHuggingFaceEmbeddings,
+            LCMistralEmbeddings,
            OpenAIEmbeddings,
            TeiEndpointEmbeddings,
+            VoyageAIEmbeddings,
        )

        self._vendors = [
@ -70,7 +72,9 @@ class EmbeddingManager:
            LCCohereEmbeddings,
            LCHuggingFaceEmbeddings,
            LCGoogleEmbeddings,
+            LCMistralEmbeddings,
            TeiEndpointEmbeddings,
+            VoyageAIEmbeddings,
        ]

    def __getitem__(self, key: str) -> BaseEmbeddings:
--- a/libs/ktem/ktem/index/file/ui.py
+++ b/libs/ktem/ktem/index/file/ui.py
@ -40,8 +40,8 @@ chat_input_focus_js_with_submit = """
 function() {
    let chatInput = document.querySelector("#chat-input textarea");
    let chatInputSubmit = document.querySelector("#chat-input button.submit-button");
-    chatInputSubmit.click();
    chatInput.focus();
+    chatInputSubmit.click();
 }
 """

@ -1059,15 +1059,18 @@ class FileIndexPage(BasePage):
        """Handle zip files"""
        zip_files = [file for file in files if file.endswith(".zip")]
        remaining_files = [file for file in files if not file.endswith("zip")]
+        errors: list[str] = []

        # Clean-up <zip_dir> before unzip to remove old files
        shutil.rmtree(zip_dir, ignore_errors=True)

+        # Unzip
        for zip_file in zip_files:
            # Prepare new zip output dir, separated for each files
            basename = os.path.splitext(os.path.basename(zip_file))[0]
            zip_out_dir = os.path.join(zip_dir, basename)
            os.makedirs(zip_out_dir, exist_ok=True)
+
            with zipfile.ZipFile(zip_file, "r") as zip_ref:
                zip_ref.extractall(zip_out_dir)

@ -1084,7 +1087,7 @@ class FileIndexPage(BasePage):
        if n_zip_file > 0:
            print(f"Update zip files: {n_zip_file}")

-        return remaining_files
+        return remaining_files, errors

    def index_fn(
        self, files, urls, reindex: bool, settings, user_id
@ -1100,16 +1103,18 @@ class FileIndexPage(BasePage):
        """
        if urls:
            files = [it.strip() for it in urls.split("\n")]
-            errors = []
+            errors = self.validate_urls(files)
        else:
            if not files:
                gr.Info("No uploaded file")
                yield "", ""
                return
+            files, unzip_errors = self._may_extract_zip(
+                files, flowsettings.KH_ZIP_INPUT_DIR
+            )
+            errors = self.validate_files(files)
+            errors.extend(unzip_errors)

-            files = self._may_extract_zip(files, flowsettings.KH_ZIP_INPUT_DIR)
-
-            errors = self.validate(files)
        if errors:
            gr.Warning(", ".join(errors))
            yield "", ""
@ -1569,7 +1574,7 @@ class FileIndexPage(BasePage):
            selected_item["files"],
        )

-    def validate(self, files: list[str]):
+    def validate_files(self, files: list[str]):
        """Validate if the files are valid"""
        paths = [Path(file) for file in files]
        errors = []
@ -1598,6 +1603,14 @@ class FileIndexPage(BasePage):

        return errors

+    def validate_urls(self, urls: list[str]):
+        """Validate if the urls are valid"""
+        errors = []
+        for url in urls:
+            if not url.startswith("http") and not url.startswith("https"):
+                errors.append(f"Invalid url `{url}`")
+        return errors
+

 class FileSelector(BasePage):
    """File selector UI in the Chat page"""
@ -1618,8 +1631,8 @@ class FileSelector(BasePage):
        self.mode = gr.Radio(
            value=default_mode,
            choices=[
-                ("Search All", "all"),
-                ("Search In File(s)", "select"),
+                (" جستجو همگانی ", "all"),
+                (" جستجو در فایل ها ", "select"),
            ],
            container=False,
        )
--- a/libs/ktem/ktem/main.py
+++ b/libs/ktem/ktem/main.py
@ -48,12 +48,12 @@ class App(BaseApp):
                from ktem.pages.login import LoginPage

                with gr.Tab(
-                    "Welcome", elem_id="login-tab", id="login-tab"
+                    "خوش آمدید", elem_id="login-tab", id="login-tab"
                ) as self._tabs["login-tab"]:
                    self.login_page = LoginPage(self)

            with gr.Tab(
-                "Chat",
+                "گفتگو",
                elem_id="chat-tab",
                id="chat-tab",
                visible=not self.f_user_management,
@ -77,7 +77,7 @@ class App(BaseApp):
                        setattr(self, f"_index_{index.id}", page)
            elif len(self.index_manager.indices) > 1:
                with gr.Tab(
-                    "Files",
+                    "فایل ها",
                    elem_id="indices-tab",
                    elem_classes=["fill-main-area-height", "scrollable", "indices-tab"],
                    id="indices-tab",
@ -94,7 +94,7 @@ class App(BaseApp):
            if not KH_DEMO_MODE:
                if not KH_SSO_ENABLED:
                    with gr.Tab(
-                        "Resources",
+                        "منابع",
                        elem_id="resources-tab",
                        id="resources-tab",
                        visible=not self.f_user_management,
@ -103,7 +103,7 @@ class App(BaseApp):
                        self.resources_page = ResourcesTab(self)

                with gr.Tab(
-                    "Settings",
+                    "تنظیمات",
                    elem_id="settings-tab",
                    id="settings-tab",
                    visible=not self.f_user_management,
@ -112,7 +112,7 @@ class App(BaseApp):
                    self.settings_page = SettingsPage(self)

            with gr.Tab(
-                "Help",
+                "راهنما",
                elem_id="help-tab",
                id="help-tab",
                visible=not self.f_user_management,
--- a/libs/ktem/ktem/pages/chat/init.py
+++ b/libs/ktem/ktem/pages/chat/init.py
@ -272,7 +272,7 @@ class ChatPage(BasePage):

                if len(self._app.index_manager.indices) > 0:
                    quick_upload_label = (
-                        "Quick Upload" if not KH_DEMO_MODE else "Or input new paper URL"
+                        "بارگذاری" if not KH_DEMO_MODE else "Or input new paper URL"
                    )

                    with gr.Accordion(label=quick_upload_label) as _:
@ -287,9 +287,9 @@ class ChatPage(BasePage):
                            )
                        self.quick_urls = gr.Textbox(
                            placeholder=(
-                                "Or paste URLs"
+                                "یا آدرس وب جایگذاری کنید"
                                if not KH_DEMO_MODE
-                                else "Paste Arxiv URLs\n(https://arxiv.org/abs/xxx)"
+                                else "آدرس وب جایگذاری کنید\n(https://arxiv.org/abs/xxx)"
                            ),
                            lines=1,
                            container=False,
@ -314,17 +314,17 @@ class ChatPage(BasePage):
                self.chat_panel = ChatPanel(self._app)

                with gr.Accordion(
-                    label="Chat settings",
+                    label="تنظیمات گفتگو",
                    elem_id="chat-settings-expand",
                    open=False,
                    visible=not KH_DEMO_MODE,
                ) as self.chat_settings:
                    with gr.Row(elem_id="quick-setting-labels"):
-                        gr.HTML("Reasoning method")
+                        gr.HTML("روش استدلال")
                        gr.HTML(
-                            "Model", visible=not KH_DEMO_MODE and not KH_SSO_ENABLED
+                            "مدل", visible=not KH_DEMO_MODE and not KH_SSO_ENABLED
                        )
-                        gr.HTML("Language")
+                        gr.HTML("زبان")

                    with gr.Row():
                        reasoning_setting = (
@ -372,7 +372,7 @@ class ChatPage(BasePage):
                        if not config("USE_LOW_LLM_REQUESTS", default=False, cast=bool):
                            self.use_mindmap = gr.State(value=True)
                            self.use_mindmap_check = gr.Checkbox(
-                                label="Mindmap (on)",
+                                label="نقشه ذهنی روشن",
                                container=False,
                                elem_id="use-mindmap-checkbox",
                                value=True,
@ -380,7 +380,7 @@ class ChatPage(BasePage):
                        else:
                            self.use_mindmap = gr.State(value=False)
                            self.use_mindmap_check = gr.Checkbox(
-                                label="Mindmap (off)",
+                                label="نقشه ذهنی خاموش",
                                container=False,
                                elem_id="use-mindmap-checkbox",
                                value=False,
@ -390,7 +390,7 @@ class ChatPage(BasePage):
                scale=INFO_PANEL_SCALES[False], elem_id="chat-info-panel"
            ) as self.info_column:
                with gr.Accordion(
-                    label="Information panel", open=True, elem_id="info-expand"
+                    label="پنل اطلاعات", open=True, elem_id="info-expand"
                ):
                    self.modal = gr.HTML("<div id='pdf-modal'></div>")
                    self.plot_panel = gr.Plot(visible=False)
--- a/libs/ktem/ktem/pages/chat/chat_panel.py
+++ b/libs/ktem/ktem/pages/chat/chat_panel.py
@ -6,15 +6,15 @@ KH_DEMO_MODE = getattr(flowsettings, "KH_DEMO_MODE", False)

 if not KH_DEMO_MODE:
    PLACEHOLDER_TEXT = (
-        "This is the beginning of a new conversation.\n"
-        "Start by uploading a file or a web URL. "
-        "Visit Files tab for more options (e.g: GraphRAG)."
+        ".این شروع یک گفتگوی جدید است\n"
+        ".با بارگذاری یک فایل یا یک آدرس وب شروع کنید\n "
+        ".برای گزینه های بیشتر به برگه فایل ها مراجعه کنید "
    )
 else:
    PLACEHOLDER_TEXT = (
-        "Welcome to Kotaemon Demo. "
-        "Start by browsing preloaded conversations to get onboard.\n"
-        "Check out Hint section for more tips."
+        ".به دموی دیتال چت خوش آمدید \n"
+        ".برای شروع، مکالمات قبلی بارگذاری شده را مرور کنید\n"
+        ".برای نکات بیشتر به بخش راهنمایی مراجعه کنید"
    )


@ -28,6 +28,7 @@ class ChatPanel(BasePage):
            label=self._app.app_name,
            placeholder=PLACEHOLDER_TEXT,
            show_label=False,
+            rtl = True,
            elem_id="main-chat-bot",
            show_copy_button=True,
            likeable=True,
@ -37,9 +38,10 @@ class ChatPanel(BasePage):
            self.text_input = gr.MultimodalTextbox(
                interactive=True,
                scale=20,
+                rtl=True,
                file_count="multiple",
                placeholder=(
-                    "Type a message, search the @web, or tag a file with @filename"
+                    "یک پیام بنویسید"
                ),
                container=False,
                show_label=False,
--- a/libs/ktem/ktem/pages/chat/control.py
+++ b/libs/ktem/ktem/pages/chat/control.py
@ -51,8 +51,8 @@ class ConversationControl(BasePage):

    def on_building_ui(self):
        with gr.Row():
-            title_text = "Conversations" if not KH_DEMO_MODE else "Kotaemon Papers"
-            gr.Markdown("## {}".format(title_text))
+            title_text = "گفتگو ها" if not KH_DEMO_MODE else "Kotaemon Papers"
+            gr.Markdown(f'<div dir="rtl"> {title_text}</div>')
            self.btn_toggle_dark_mode = gr.Button(
                value="",
                icon=f"{ASSETS_DIR}/dark_mode.svg",
@ -66,7 +66,7 @@ class ConversationControl(BasePage):
                icon=f"{ASSETS_DIR}/expand.svg",
                scale=1,
                size="sm",
-                elem_classes=["no-background", "body-text-color"],
+                elem_classes=["no-background", "body-text-color" , "top-left-button"],
                elem_id="chat-expand-button",
            )
            self.btn_info_expand = gr.Button(
@ -75,7 +75,7 @@ class ConversationControl(BasePage):
                min_width=2,
                scale=1,
                size="sm",
-                elem_classes=["no-background", "body-text-color"],
+                elem_classes=["no-background", "body-text-color" , "top-left-button"],
                elem_id="info-expand-button",
            )

@ -102,7 +102,7 @@ class ConversationControl(BasePage):
        with gr.Row() as self._new_delete:
            self.cb_suggest_chat = gr.Checkbox(
                value=False,
-                label="Suggest chat",
+                label=" پیشنهاد گفتگو ",
                min_width=10,
                scale=6,
                elem_id="suggest-chat-checkbox",
@ -111,7 +111,7 @@ class ConversationControl(BasePage):
            )
            self.cb_is_public = gr.Checkbox(
                value=False,
-                label="Share this conversation",
+                label="این گفتگو را ارسال کن",
                elem_id="is-public-checkbox",
                container=False,
                visible=not KH_DEMO_MODE and not KH_SSO_ENABLED,
--- a/libs/ktem/ktem/pages/chat/report.py
+++ b/libs/ktem/ktem/pages/chat/report.py
@ -12,34 +12,31 @@ class ReportIssue(BasePage):
        self.on_building_ui()

    def on_building_ui(self):
-        with gr.Accordion(label="Feedback", open=False, elem_id="report-accordion"):
+        with gr.Accordion(label="بازخورد", open=False, elem_id="report-accordion"):
            self.correctness = gr.Radio(
                choices=[
-                    ("The answer is correct", "correct"),
-                    ("The answer is incorrect", "incorrect"),
+                    (" پاسخ صحیح است ", "correct"),
+                    (" پاسخ اشتباه است ", "incorrect"),
                ],
-                label="Correctness:",
+                label="صحت سنجی:",
            )
            self.issues = gr.CheckboxGroup(
                choices=[
-                    ("The answer is offensive", "offensive"),
-                    ("The evidence is incorrect", "wrong-evidence"),
+                    (" پاسخ نامحترمانه است ", "offensive"),
+                    (" مدارک اشتباه است ", "wrong-evidence"),
                ],
-                label="Other issue:",
+                label="دیگر مشکلات:",
            )
            self.more_detail = gr.Textbox(
                placeholder=(
-                    "More detail (e.g. how wrong is it, what is the "
-                    "correct answer, etc...)"
+                    "جزئیات بیشتر (مثلا چقدر اشتباه است، پاسخ صحیح چیست، و غیره...)"
                ),
                container=False,
                lines=3,
            )
-            gr.Markdown(
-                "This will send the current chat and the user settings to "
-                "help with investigation"
-            )
-            self.report_btn = gr.Button("Report")
+            alert_text = "این عمل، چت فعلی و تنظیمات کاربر را برای کمک به تحقیق ارسال خواهد کرد"
+            gr.Markdown(f'<div dir="rtl"> {alert_text}</div>')
+            self.report_btn = gr.Button("گزارش")

    def report(
        self,
@ -83,4 +80,4 @@ class ReportIssue(BasePage):
            )
            session.add(issue)
            session.commit()
-        gr.Info("Thank you for your feedback")
+        gr.Info("از بازخورد شما متشکریم")
--- a/libs/ktem/ktem/rerankings/manager.py
+++ b/libs/ktem/ktem/rerankings/manager.py
@ -52,9 +52,13 @@ class RerankingManager:
                    self._default = item.name

    def load_vendors(self):
-        from kotaemon.rerankings import CohereReranking, TeiFastReranking
+        from kotaemon.rerankings import (
+            CohereReranking,
+            TeiFastReranking,
+            VoyageAIReranking,
+        )

-        self._vendors = [TeiFastReranking, CohereReranking]
+        self._vendors = [TeiFastReranking, CohereReranking, VoyageAIReranking]

    def __getitem__(self, key: str) -> BaseReranking:
        """Get model by name"""
--- a/libs/ktem/ktem/utils/lang.py
+++ b/libs/ktem/ktem/utils/lang.py
@ -1,5 +1,6 @@
 SUPPORTED_LANGUAGE_MAP = {
    "en": "English",
+    "fa": "Persian",
    "ja": "Japanese",
    "vi": "Vietnamese",
    "es": "Spanish",
Author	SHA1	Message	Date
Ali-Noghabi	d8309edefb	Add persian lang Some checks failed Auto Bump and Release / auto-bump-and-release (push) Has been cancelled Details style-check / pre-commit (push) Has been cancelled Details unit-test / unit testing with python ${{ matrix.python-version }} (. env/bin/activate, $GITHUB_OUTPUT, ubuntu-latest, bash) (push) Has been cancelled Details unit-test / unit testing with python ${{ matrix.python-version }} (3.10) (push) Has been cancelled Details unit-test / unit testing with python ${{ matrix.python-version }} (3.11) (push) Has been cancelled Details	2025-07-27 10:12:01 +03:30
Ali-Noghabi	17864a439a	add json compatibility for upload file , change app main name , add persian languange compatibility	2025-07-24 15:25:08 +03:30
Khoi-Nguyen Nguyen-Ngoc	37cdc28ceb	fix: add validation to avoid path-traversal vulnerabilities (#755 ) * fix: add validation to avoid path-traversal vulnerabilities * fix: update init value is_safe Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * refactor: extract zip check * fix: dont need to check relative path * fix: disable check zip file (zipfile have taken it) --------- Co-authored-by: kan_cin <kan@cinnamon.is> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: phv2312 <kat87yb@gmail.com>	2025-07-02 14:50:40 +07:00
kan_cin	ec1f6abdc4	fix: typo lancedb (#760 )	2025-07-01 17:11:22 +07:00
mginfn	ffe766f24d	chore: added base_url parameter to CochereReranking (#743 ) Co-authored-by: Mauro Gattari <mauro.gattari@infn.it>	2025-06-05 16:35:00 +07:00
TommasoMoroHtx	833982ac81	fix(docstore): preserve retrieval ranking order in lancedb get() (#745 )	2025-06-05 16:08:49 +07:00
Pang Chun Lam	ddb5187293	fix: scope is not passd to vector store query (#747 )	2025-06-05 16:08:40 +07:00
Tuan Anh Nguyen Dang (Tadashi_Cin)	5132288386	feat: add VoyageAI's rerank and embeddings models (#733 ) #none * Introducing VoyageAI's rerank and embeddings models * fix: comfort CI * fix: update test case --------- Co-authored-by: fzowl <zoltan@voyageai.com>	2025-04-15 15:54:23 +07:00
Amin	c33bedca9e	feat: add options for Mistral AI (#707 ) #none * add Mistral AI emb AI embedding vendor, types * add mistral env setting to example * add mistral LLM option * chore: fix default embedding back to normal * fix: comfort CI --------- Co-authored-by: Tadashi <tadashi@cinnamon.is>	2025-04-15 15:11:22 +07:00
Ben Dykstra	9b05693e4f	feat: add structured output to openai (#603 ) #none * add structured output to openai * remove notebook, modify prepare output method * fix: comfort precommit --------- Co-authored-by: Tadashi <tadashi@cinnamon.is>	2025-04-15 14:54:23 +07:00