Add persian lang

add json compatibility for upload file , change app main name , add persian languange compatibility
fix: add validation to avoid path-traversal vulnerabilities (#755 )
2025-07-27 10:12:01 +03:30 · 2025-07-24 15:25:08 +03:30 · 2025-07-02 14:50:40 +07:00 · 2025-07-01 17:11:22 +07:00 · 2025-06-05 16:35:00 +07:00 · 2025-06-05 16:08:49 +07:00
30 changed files with 496 additions and 82 deletions
--- a/.env.example
+++ b/.env.example
@ -1,3 +1,4 @@
 # this is an example .env file, use it to create your own .env file and place it in the root of the project
 # settings for OpenAI
@ -16,6 +17,12 @@ AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=text-embedding-ada-002
 # settings for Cohere
 COHERE_API_KEY=<COHERE_API_KEY>
 # settings for Mistral
 # MISTRAL_API_KEY=placeholder
 # settings for VoyageAI
 VOYAGE_API_KEY=<VOYAGE_API_KEY>
 # settings for local models
 LOCAL_MODEL=qwen2.5:7b
 LOCAL_MODEL_EMBEDDINGS=nomic-embed-text
--- a/flowsettings.py
+++ b/flowsettings.py
@ -172,6 +172,25 @@ if OPENAI_API_KEY:
        "default": IS_OPENAI_DEFAULT,
    }
 VOYAGE_API_KEY = config("VOYAGE_API_KEY", default="")
 if VOYAGE_API_KEY:
    KH_EMBEDDINGS["voyageai"] = {
        "spec": {
            "__type__": "kotaemon.embeddings.VoyageAIEmbeddings",
            "api_key": VOYAGE_API_KEY,
            "model": config("VOYAGE_EMBEDDINGS_MODEL", default="voyage-3-large"),
        },
        "default": False,
    }
    KH_RERANKINGS["voyageai"] = {
        "spec": {
            "__type__": "kotaemon.rerankings.VoyageAIReranking",
            "model_name": "rerank-2",
            "api_key": VOYAGE_API_KEY,
        },
        "default": False,
    }
 if config("LOCAL_MODEL", default=""):
    KH_LLMS["ollama"] = {
        "spec": {
@ -243,6 +262,15 @@ KH_LLMS["cohere"] = {
    },
    "default": False,
 }
 KH_LLMS["mistral"] = {
    "spec": {
        "__type__": "kotaemon.llms.ChatOpenAI",
        "base_url": "https://api.mistral.ai/v1",
        "model": "ministral-8b-latest",
        "api_key": config("MISTRAL_API_KEY", default="your-key"),
    },
    "default": False,
 }
 # additional embeddings configurations
 KH_EMBEDDINGS["cohere"] = {
@ -262,6 +290,14 @@ KH_EMBEDDINGS["google"] = {
    },
    "default": not IS_OPENAI_DEFAULT,
 }
 KH_EMBEDDINGS["mistral"] = {
    "spec": {
        "__type__": "kotaemon.embeddings.LCMistralEmbeddings",
        "model": "mistral-embed",
        "api_key": config("MISTRAL_API_KEY", default="your-key"),
    },
    "default": False,
 }
 # KH_EMBEDDINGS["huggingface"] = {
 #     "spec": {
 #         "__type__": "kotaemon.embeddings.LCHuggingFaceEmbeddings",
@ -343,7 +379,7 @@ GRAPHRAG_INDICES = [
        "config": {
            "supported_file_types": (
                ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
-                ".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
+                ".pptx, .csv, .html, .mhtml, .txt, .md, .zip, .json"
            ),
            "private": True,
        },
--- a/libs/kotaemon/kotaemon/base/init.py
+++ b/libs/kotaemon/kotaemon/base/init.py
@ -8,6 +8,7 @@ from .schema import (
    HumanMessage,
    LLMInterface,
    RetrievedDocument,
    StructuredOutputLLMInterface,
    SystemMessage,
 )
@ -21,6 +22,7 @@ __all__ = [
    "HumanMessage",
    "RetrievedDocument",
    "LLMInterface",
    "StructuredOutputLLMInterface",
    "ExtractorOutput",
    "Param",
    "Node",
--- a/libs/kotaemon/kotaemon/base/schema.py
+++ b/libs/kotaemon/kotaemon/base/schema.py
@ -143,6 +143,11 @@ class LLMInterface(AIMessage):
    logprobs: list[float] = []
 class StructuredOutputLLMInterface(LLMInterface):
    parsed: Any
    refusal: str = ""
 class ExtractorOutput(Document):
    """
    Represents the output of an extractor.
--- a/libs/kotaemon/kotaemon/embeddings/init.py
+++ b/libs/kotaemon/kotaemon/embeddings/init.py
@ -6,10 +6,12 @@ from .langchain_based import (
    LCCohereEmbeddings,
    LCGoogleEmbeddings,
    LCHuggingFaceEmbeddings,
    LCMistralEmbeddings,
    LCOpenAIEmbeddings,
 )
 from .openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
 from .tei_endpoint_embed import TeiEndpointEmbeddings
 from .voyageai import VoyageAIEmbeddings
 __all__ = [
    "BaseEmbeddings",
@ -20,7 +22,9 @@ __all__ = [
    "LCCohereEmbeddings",
    "LCHuggingFaceEmbeddings",
    "LCGoogleEmbeddings",
    "LCMistralEmbeddings",
    "OpenAIEmbeddings",
    "AzureOpenAIEmbeddings",
    "FastEmbedEmbeddings",
    "VoyageAIEmbeddings",
 ]
--- a/libs/kotaemon/kotaemon/embeddings/langchain_based.py
+++ b/libs/kotaemon/kotaemon/embeddings/langchain_based.py
@ -254,3 +254,40 @@ class LCGoogleEmbeddings(LCEmbeddingMixin, BaseEmbeddings):
            raise ImportError("Please install langchain-google-genai")
        return GoogleGenerativeAIEmbeddings
 class LCMistralEmbeddings(LCEmbeddingMixin, BaseEmbeddings):
    """Wrapper around LangChain's MistralAI embedding, focusing on key parameters"""
    api_key: str = Param(
        help="API key (https://console.mistral.ai/api-keys)",
        default=None,
        required=True,
    )
    model: str = Param(
        help="Model name to use ('mistral-embed')",
        default="mistral-embed",
        required=True,
    )
    def __init__(
        self,
        model: str = "mistral-embed",
        api_key: Optional[str] = None,
        **params,
    ):
        super().__init__(
            model=model,
            api_key=api_key,
            **params,
        )
    def _get_lc_class(self):
        try:
            from langchain_mistralai import MistralAIEmbeddings
        except ImportError:
            raise ImportError(
                "Please install langchain_mistralai: "
                "`pip install -U langchain_mistralai`"
            )
        return MistralAIEmbeddings
--- a/libs/kotaemon/kotaemon/embeddings/voyageai.py
+++ b/libs/kotaemon/kotaemon/embeddings/voyageai.py
@ -0,0 +1,66 @@
 """Implements embeddings from [Voyage AI](https://voyageai.com).
 """
 import importlib
 from kotaemon.base import Document, DocumentWithEmbedding, Param
 from .base import BaseEmbeddings
 vo = None
 def _import_voyageai():
    global vo
    if not vo:
        vo = importlib.import_module("voyageai")
    return vo
 def _format_output(texts: list[str], embeddings: list[list]):
    """Formats the output of all `.embed` calls.
    Args:
        texts: List of original documents
        embeddings: Embeddings corresponding to each document
    """
    return [
        DocumentWithEmbedding(content=text, embedding=embedding)
        for text, embedding in zip(texts, embeddings)
    ]
 class VoyageAIEmbeddings(BaseEmbeddings):
    """Voyage AI provides best-in-class embedding models and rerankers."""
    api_key: str = Param(None, help="Voyage API key", required=False)
    model: str = Param(
        "voyage-3",
        help=(
            "Model name to use. The Voyage "
            "[documentation](https://docs.voyageai.com/docs/embeddings) "
            "provides a list of all available embedding models."
        ),
        required=True,
    )
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        if not self.api_key:
            raise ValueError("API key must be provided for VoyageAIEmbeddings.")
        self._client = _import_voyageai().Client(api_key=self.api_key)
        self._aclient = _import_voyageai().AsyncClient(api_key=self.api_key)
    def invoke(
        self, text: str | list[str] | Document | list[Document], *args, **kwargs
    ) -> list[DocumentWithEmbedding]:
        texts = [t.content for t in self.prepare_input(text)]
        embeddings = self._client.embed(texts, model=self.model).embeddings
        return _format_output(texts, embeddings)
    async def ainvoke(
        self, text: str | list[str] | Document | list[Document], *args, **kwargs
    ) -> list[DocumentWithEmbedding]:
        texts = [t.content for t in self.prepare_input(text)]
        embeddings = await self._aclient.embed(texts, model=self.model).embeddings
        return _format_output(texts, embeddings)
--- a/libs/kotaemon/kotaemon/indices/vectorindex.py
+++ b/libs/kotaemon/kotaemon/indices/vectorindex.py
@ -168,7 +168,7 @@ class VectorRetrieval(BaseRetrieval):
        if self.retrieval_mode == "vector":
            emb = self.embedding(text)[0].embedding
            _, scores, ids = self.vector_store.query(
-                embedding=emb, top_k=top_k_first_round, **kwargs
+                embedding=emb, top_k=top_k_first_round, doc_ids=scope, **kwargs
            )
            docs = self.doc_store.get(ids)
            result = [
@ -197,7 +197,7 @@ class VectorRetrieval(BaseRetrieval):
                assert self.doc_store is not None
                _, vs_scores, vs_ids = self.vector_store.query(
-                    embedding=emb, top_k=top_k_first_round, **kwargs
+                    embedding=emb, top_k=top_k_first_round, doc_ids=scope, **kwargs
                )
                if vs_ids:
                    vs_docs = self.doc_store.get(vs_ids)
--- a/libs/kotaemon/kotaemon/llms/init.py
+++ b/libs/kotaemon/kotaemon/llms/init.py
@ -14,6 +14,7 @@ from .chats import (
    LCGeminiChat,
    LCOllamaChat,
    LlamaCppChat,
    StructuredOutputChatOpenAI,
 )
 from .completions import LLM, AzureOpenAI, LlamaCpp, OpenAI
 from .cot import ManualSequentialChainOfThought, Thought
@ -31,6 +32,7 @@ __all__ = [
    "SystemMessage",
    "AzureChatOpenAI",
    "ChatOpenAI",
    "StructuredOutputChatOpenAI",
    "LCAnthropicChat",
    "LCGeminiChat",
    "LCCohereChat",
--- a/libs/kotaemon/kotaemon/llms/chats/init.py
+++ b/libs/kotaemon/kotaemon/llms/chats/init.py
@ -10,7 +10,7 @@ from .langchain_based import (
    LCOllamaChat,
 )
 from .llamacpp import LlamaCppChat
-from .openai import AzureChatOpenAI, ChatOpenAI
+from .openai import AzureChatOpenAI, ChatOpenAI, StructuredOutputChatOpenAI
 __all__ = [
    "ChatOpenAI",
@ -18,6 +18,7 @@ __all__ = [
    "ChatLLM",
    "EndpointChatLLM",
    "ChatOpenAI",
    "StructuredOutputChatOpenAI",
    "LCAnthropicChat",
    "LCGeminiChat",
    "LCCohereChat",
--- a/libs/kotaemon/kotaemon/llms/chats/openai.py
+++ b/libs/kotaemon/kotaemon/llms/chats/openai.py
@ -1,8 +1,16 @@
-from typing import TYPE_CHECKING, AsyncGenerator, Iterator, Optional
+from typing import TYPE_CHECKING, AsyncGenerator, Iterator, Optional, Type
 from pydantic import BaseModel
 from theflow.utils.modules import import_dotted_string
-from kotaemon.base import AIMessage, BaseMessage, HumanMessage, LLMInterface, Param
+from kotaemon.base import (
    AIMessage,
    BaseMessage,
    HumanMessage,
    LLMInterface,
    Param,
    StructuredOutputLLMInterface,
 )
 from .base import ChatLLM
@ -330,6 +338,88 @@ class ChatOpenAI(BaseChatOpenAI):
        return await client.chat.completions.create(**params)
 class StructuredOutputChatOpenAI(ChatOpenAI):
    """OpenAI chat model that returns structured output"""
    response_schema: Type[BaseModel] = Param(
        help="class that subclasses pydantics BaseModel", required=True
    )
    def prepare_output(self, resp: dict) -> StructuredOutputLLMInterface:
        """Convert the OpenAI response into StructuredOutputLLMInterface"""
        additional_kwargs = {}
        if "tool_calls" in resp["choices"][0]["message"]:
            additional_kwargs["tool_calls"] = resp["choices"][0]["message"][
                "tool_calls"
            ]
        if resp["choices"][0].get("logprobs") is None:
            logprobs = []
        else:
            all_logprobs = resp["choices"][0]["logprobs"].get("content")
            logprobs = (
                [logprob["logprob"] for logprob in all_logprobs] if all_logprobs else []
            )
        output = StructuredOutputLLMInterface(
            parsed=resp["choices"][0]["message"]["parsed"],
            candidates=[(_["message"]["content"] or "") for _ in resp["choices"]],
            content=resp["choices"][0]["message"]["content"] or "",
            total_tokens=resp["usage"]["total_tokens"],
            prompt_tokens=resp["usage"]["prompt_tokens"],
            completion_tokens=resp["usage"]["completion_tokens"],
            messages=[
                AIMessage(content=(_["message"]["content"]) or "")
                for _ in resp["choices"]
            ],
            additional_kwargs=additional_kwargs,
            logprobs=logprobs,
        )
        return output
    def prepare_params(self, **kwargs):
        if "tools_pydantic" in kwargs:
            kwargs.pop("tools_pydantic")
        params_ = {
            "model": self.model,
            "temperature": self.temperature,
            "max_tokens": self.max_tokens,
            "n": self.n,
            "stop": self.stop,
            "frequency_penalty": self.frequency_penalty,
            "presence_penalty": self.presence_penalty,
            "tool_choice": self.tool_choice,
            "tools": self.tools,
            "logprobs": self.logprobs,
            "logit_bias": self.logit_bias,
            "top_logprobs": self.top_logprobs,
            "top_p": self.top_p,
            "response_format": self.response_schema,
        }
        params = {k: v for k, v in params_.items() if v is not None}
        params.update(kwargs)
        # doesn't do streaming
        params.pop("stream")
        return params
    def openai_response(self, client, **kwargs):
        """Get the openai response"""
        params = self.prepare_params(**kwargs)
        return client.beta.chat.completions.parse(**params)
    async def aopenai_response(self, client, **kwargs):
        """Get the openai response"""
        params = self.prepare_params(**kwargs)
        return await client.beta.chat.completions.parse(**params)
 class AzureChatOpenAI(BaseChatOpenAI):
    """OpenAI chat model provided by Microsoft Azure"""
--- a/libs/kotaemon/kotaemon/rerankings/init.py
+++ b/libs/kotaemon/kotaemon/rerankings/init.py
@ -1,5 +1,6 @@
 from .base import BaseReranking
 from .cohere import CohereReranking
 from .tei_fast_rerank import TeiFastReranking
 from .voyageai import VoyageAIReranking
-__all__ = ["BaseReranking", "TeiFastReranking", "CohereReranking"]
+__all__ = ["BaseReranking", "TeiFastReranking", "CohereReranking", "VoyageAIReranking"]
--- a/libs/kotaemon/kotaemon/rerankings/cohere.py
+++ b/libs/kotaemon/kotaemon/rerankings/cohere.py
@ -1,5 +1,7 @@
 from __future__ import annotations
 import os
 from decouple import config
 from kotaemon.base import Document, Param
@ -23,6 +25,11 @@ class CohereReranking(BaseReranking):
        help="Cohere API key",
        required=True,
    )
    base_url: str = Param(
        None,
        help="Rerank API base url. Default is https://api.cohere.com",
        required=False,
    )
    def run(self, documents: list[Document], query: str) -> list[Document]:
        """Use Cohere Reranker model to re-order documents
@ -38,7 +45,9 @@ class CohereReranking(BaseReranking):
            print("Cohere API key not found. Skipping rerankings.")
            return documents
-        cohere_client = cohere.Client(self.cohere_api_key)
+        cohere_client = cohere.Client(
            self.cohere_api_key, base_url=self.base_url or os.getenv("CO_API_URL")
        )
        compressed_docs: list[Document] = []
        if not documents:  # to avoid empty api call
--- a/libs/kotaemon/kotaemon/rerankings/voyageai.py
+++ b/libs/kotaemon/kotaemon/rerankings/voyageai.py
@ -0,0 +1,63 @@
 from __future__ import annotations
 import importlib
 from decouple import config
 from kotaemon.base import Document, Param
 from .base import BaseReranking
 vo = None
 def _import_voyageai():
    global vo
    if not vo:
        vo = importlib.import_module("voyageai")
    return vo
 class VoyageAIReranking(BaseReranking):
    """VoyageAI Reranking model"""
    model_name: str = Param(
        "rerank-2",
        help=(
            "ID of the model to use. You can go to [Supported Models]"
            "(https://docs.voyageai.com/docs/reranker) to see the supported models"
        ),
        required=True,
    )
    api_key: str = Param(
        config("VOYAGE_API_KEY", ""),
        help="VoyageAI API key",
        required=True,
    )
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        if not self.api_key:
            raise ValueError("API key must be provided for VoyageAIEmbeddings.")
        self._client = _import_voyageai().Client(api_key=self.api_key)
        self._aclient = _import_voyageai().AsyncClient(api_key=self.api_key)
    def run(self, documents: list[Document], query: str) -> list[Document]:
        """Use VoyageAI Reranker model to re-order documents
        with their relevance score"""
        compressed_docs: list[Document] = []
        if not documents:  # to avoid empty api call
            return compressed_docs
        _docs = [d.content for d in documents]
        response = self._client.rerank(
            model=self.model_name, query=query, documents=_docs
        )
        for r in response.results:
            doc = documents[r.index]
            doc.metadata["reranking_score"] = r.relevance_score
            compressed_docs.append(doc)
        return compressed_docs
--- a/libs/kotaemon/kotaemon/storages/docstores/lancedb.py
+++ b/libs/kotaemon/kotaemon/storages/docstores/lancedb.py
@ -113,14 +113,18 @@ class LanceDBDocumentStore(BaseDocumentStore):
            )
        except (ValueError, FileNotFoundError):
            docs = []
-        return [
+
-            Document(
+        # return the documents using the order of original
        # ids (which were ordered by score)
        doc_dict = {
            doc["id"]: Document(
                id_=doc["id"],
                text=doc["text"] if doc["text"] else "<empty>",
                metadata=json.loads(doc["attributes"]),
            )
            for doc in docs
-        ]
+        }
        return [doc_dict[_id] for _id in ids if _id in doc_dict]
    def delete(self, ids: Union[List[str], str], refresh_indices: bool = True):
        """Delete document by id"""
--- a/libs/kotaemon/pyproject.toml
+++ b/libs/kotaemon/pyproject.toml
@ -36,6 +36,7 @@ dependencies = [
    "langchain-google-genai>=1.0.3,<2.0.0",
    "langchain-anthropic",
    "langchain-ollama",
    "langchain-mistralai",
    "langchain-cohere>=0.2.4,<0.3.0",
    "llama-hub>=0.0.79,<0.1.0",
    "llama-index>=0.10.40,<0.11.0",
@ -89,6 +90,7 @@ adv = [
    "tabulate",
    "unstructured>=0.15.8,<0.16",
    "wikipedia>=1.4.0,<1.5",
    "voyageai>=0.3.0",
 ]
 dev = [
    "black",
--- a/libs/kotaemon/tests/conftest.py
+++ b/libs/kotaemon/tests/conftest.py
@ -70,6 +70,15 @@ def if_llama_cpp_not_installed():
        return False
 def if_voyageai_not_installed():
    try:
        import voyageai  # noqa: F401
    except ImportError:
        return True
    else:
        return False
 skip_when_haystack_not_installed = pytest.mark.skipif(
    if_haystack_not_installed(), reason="Haystack is not installed"
 )
@ -97,3 +106,7 @@ skip_openai_lc_wrapper_test = pytest.mark.skipif(
 skip_llama_cpp_not_installed = pytest.mark.skipif(
    if_llama_cpp_not_installed(), reason="llama_cpp is not installed"
 )
 skip_when_voyageai_not_installed = pytest.mark.skipif(
    if_voyageai_not_installed(), reason="voyageai is not installed"
 )
--- a/libs/kotaemon/tests/test_embedding_models.py
+++ b/libs/kotaemon/tests/test_embedding_models.py
@ -1,22 +1,24 @@
 import json
 from pathlib import Path
-from unittest.mock import patch
+from unittest.mock import Mock, patch
 from openai.types.create_embedding_response import CreateEmbeddingResponse
-from kotaemon.base import Document
+from kotaemon.base import Document, DocumentWithEmbedding
 from kotaemon.embeddings import (
    AzureOpenAIEmbeddings,
    FastEmbedEmbeddings,
    LCCohereEmbeddings,
    LCHuggingFaceEmbeddings,
    OpenAIEmbeddings,
    VoyageAIEmbeddings,
 )
 from .conftest import (
    skip_when_cohere_not_installed,
    skip_when_fastembed_not_installed,
    skip_when_sentence_bert_not_installed,
    skip_when_voyageai_not_installed,
 )
 with open(Path(__file__).parent / "resources" / "embedding_openai_batch.json") as f:
@ -155,3 +157,16 @@ def test_fastembed_embeddings():
    model = FastEmbedEmbeddings()
    output = model("Hello World")
    assert_embedding_result(output)
 voyage_output_mock = Mock()
 voyage_output_mock.embeddings = [[1.0, 2.1, 3.2]]
@skip_when_voyageai_not_installed
@patch("voyageai.Client.embed", return_value=voyage_output_mock)
@patch("voyageai.AsyncClient.embed", return_value=voyage_output_mock)
 def test_voyageai_embeddings(sync_call, async_call):
    model = VoyageAIEmbeddings(api_key="test")
    output = model("Hello, world!")
    assert all(isinstance(doc, DocumentWithEmbedding) for doc in output)
--- a/libs/ktem/ktem/app.py
+++ b/libs/ktem/ktem/app.py
@ -39,7 +39,7 @@ class BaseApp:
    def __init__(self):
        self.dev_mode = getattr(settings, "KH_MODE", "") == "dev"
-        self.app_name = getattr(settings, "KH_APP_NAME", "Kotaemon")
+        self.app_name = getattr(settings, "KH_APP_NAME", "DatallChat")
        self.app_version = getattr(settings, "KH_APP_VERSION", "")
        self.f_user_management = getattr(settings, "KH_FEATURE_USER_MANAGEMENT", False)
        self._theme = KotaemonTheme()
--- a/libs/ktem/ktem/assets/css/main.css
+++ b/libs/ktem/ktem/assets/css/main.css
@ -1,7 +1,10 @@
 :root {
  --main-area-height: calc(100vh - 110px);
  direction: rtl;
 }
 /* no footer */
 footer {
  display: none !important;
@ -27,6 +30,11 @@ footer {
  height: 100% !important; */
 }
 input[type="radio"] {
  margin-left: 5px;
 }
 .gradio-container
 /* styling for header bar */
 .header-bar {
  background-color: transparent;
@ -168,6 +176,27 @@ mark {
  color: var(--body-text-color);
 }
 #chat-input textarea{
 direction: rtl;
 }
 #chat-input button.submit-button{
  margin-left: 3px;
  margin-right: 3px;
  transform: scaleX(-1);
  -moz-transform: scaleX(-1);
  -webkit-transform: scaleX(-1);
 }
 .secondary-wrap {
  position: relative;
 }
 .secondary-wrap .icon-wrap{
  /* direction: ltr; */
  position:absolute;
  right: 90%;  
 }
 /* for setting right-aligned buttons */
 .right-button {
  min-width: 200px !important;
@ -195,13 +224,13 @@ mark {
 #toggle-dark-button {
  position: fixed;
  top: 6px;
-  right: 30px;
+  left: 30px;
 }
 #info-expand-button {
  position: absolute;
  top: 6px;
-  right: 15px;
+  left: 30px;
 }
 /* prevent overflow of html info panel */
@ -212,7 +241,7 @@ mark {
 #chat-expand-button {
  position: absolute;
  top: 6px;
-  right: -10px;
+  left: -10px;
  z-index: 1;
 }
@ -231,14 +260,14 @@ mark {
  position: absolute;
  width: 110px;
  top: 10px;
-  right: 25px;
+  left: 15px;
 }
 #citation-dropdown {
  width: min(25%, 100px);
  position: absolute;
  top: 2px;
-  left: 120px;
+  right: 120px;
  height: 35px;
 }
@ -377,9 +406,16 @@ pdfjs-viewer-element {
 /* Bot animation */
-.message.bot {
+
-  animation: fadein 1.0s ease-in-out forwards;
+
-}
+
 /* .message.bot button{
  text-align: right;
  background-color: blue;
  direction: rtl !important;
 } */
 details.evidence {
  animation: fadein 0.3s ease-in-out forwards;
--- a/libs/ktem/ktem/assets/js/main.js
+++ b/libs/ktem/ktem/assets/js/main.js
@ -21,7 +21,7 @@ function run() {
  // setup conversation dropdown placeholder
  let conv_dropdown = document.querySelector("#conversation-dropdown input");
-  conv_dropdown.placeholder = "Browse conversation";
+  conv_dropdown.placeholder = "مرور گفتگو";
  // move info-expand-button
  let info_expand_button = document.getElementById("info-expand-button");
--- a/libs/ktem/ktem/embeddings/manager.py
+++ b/libs/ktem/ktem/embeddings/manager.py
@ -59,8 +59,10 @@ class EmbeddingManager:
            LCCohereEmbeddings,
            LCGoogleEmbeddings,
            LCHuggingFaceEmbeddings,
            LCMistralEmbeddings,
            OpenAIEmbeddings,
            TeiEndpointEmbeddings,
            VoyageAIEmbeddings,
        )
        self._vendors = [
@ -70,7 +72,9 @@ class EmbeddingManager:
            LCCohereEmbeddings,
            LCHuggingFaceEmbeddings,
            LCGoogleEmbeddings,
            LCMistralEmbeddings,
            TeiEndpointEmbeddings,
            VoyageAIEmbeddings,
        ]
    def __getitem__(self, key: str) -> BaseEmbeddings:
--- a/libs/ktem/ktem/index/file/ui.py
+++ b/libs/ktem/ktem/index/file/ui.py
@ -40,8 +40,8 @@ chat_input_focus_js_with_submit = """
 function() {
    let chatInput = document.querySelector("#chat-input textarea");
    let chatInputSubmit = document.querySelector("#chat-input button.submit-button");
    chatInputSubmit.click();
    chatInput.focus();
    chatInputSubmit.click();
 }
 """
@ -1059,15 +1059,18 @@ class FileIndexPage(BasePage):
        """Handle zip files"""
        zip_files = [file for file in files if file.endswith(".zip")]
        remaining_files = [file for file in files if not file.endswith("zip")]
        errors: list[str] = []
        # Clean-up <zip_dir> before unzip to remove old files
        shutil.rmtree(zip_dir, ignore_errors=True)
        # Unzip
        for zip_file in zip_files:
            # Prepare new zip output dir, separated for each files
            basename = os.path.splitext(os.path.basename(zip_file))[0]
            zip_out_dir = os.path.join(zip_dir, basename)
            os.makedirs(zip_out_dir, exist_ok=True)
            with zipfile.ZipFile(zip_file, "r") as zip_ref:
                zip_ref.extractall(zip_out_dir)
@ -1084,7 +1087,7 @@ class FileIndexPage(BasePage):
        if n_zip_file > 0:
            print(f"Update zip files: {n_zip_file}")
-        return remaining_files
+        return remaining_files, errors
    def index_fn(
        self, files, urls, reindex: bool, settings, user_id
@ -1100,16 +1103,18 @@ class FileIndexPage(BasePage):
        """
        if urls:
            files = [it.strip() for it in urls.split("\n")]
-            errors = []
+            errors = self.validate_urls(files)
        else:
            if not files:
                gr.Info("No uploaded file")
                yield "", ""
                return
            files, unzip_errors = self._may_extract_zip(
                files, flowsettings.KH_ZIP_INPUT_DIR
            )
            errors = self.validate_files(files)
            errors.extend(unzip_errors)
            files = self._may_extract_zip(files, flowsettings.KH_ZIP_INPUT_DIR)
            errors = self.validate(files)
        if errors:
            gr.Warning(", ".join(errors))
            yield "", ""
@ -1569,7 +1574,7 @@ class FileIndexPage(BasePage):
            selected_item["files"],
        )
-    def validate(self, files: list[str]):
+    def validate_files(self, files: list[str]):
        """Validate if the files are valid"""
        paths = [Path(file) for file in files]
        errors = []
@ -1598,6 +1603,14 @@ class FileIndexPage(BasePage):
        return errors
    def validate_urls(self, urls: list[str]):
        """Validate if the urls are valid"""
        errors = []
        for url in urls:
            if not url.startswith("http") and not url.startswith("https"):
                errors.append(f"Invalid url `{url}`")
        return errors
 class FileSelector(BasePage):
    """File selector UI in the Chat page"""
@ -1618,8 +1631,8 @@ class FileSelector(BasePage):
        self.mode = gr.Radio(
            value=default_mode,
            choices=[
-                ("Search All", "all"),
+                (" جستجو همگانی ", "all"),
-                ("Search In File(s)", "select"),
+                (" جستجو در فایل ها ", "select"),
            ],
            container=False,
        )
--- a/libs/ktem/ktem/main.py
+++ b/libs/ktem/ktem/main.py
@ -48,12 +48,12 @@ class App(BaseApp):
                from ktem.pages.login import LoginPage
                with gr.Tab(
-                    "Welcome", elem_id="login-tab", id="login-tab"
+                    "خوش آمدید", elem_id="login-tab", id="login-tab"
                ) as self._tabs["login-tab"]:
                    self.login_page = LoginPage(self)
            with gr.Tab(
-                "Chat",
+                "گفتگو",
                elem_id="chat-tab",
                id="chat-tab",
                visible=not self.f_user_management,
@ -77,7 +77,7 @@ class App(BaseApp):
                        setattr(self, f"_index_{index.id}", page)
            elif len(self.index_manager.indices) > 1:
                with gr.Tab(
-                    "Files",
+                    "فایل ها",
                    elem_id="indices-tab",
                    elem_classes=["fill-main-area-height", "scrollable", "indices-tab"],
                    id="indices-tab",
@ -94,7 +94,7 @@ class App(BaseApp):
            if not KH_DEMO_MODE:
                if not KH_SSO_ENABLED:
                    with gr.Tab(
-                        "Resources",
+                        "منابع",
                        elem_id="resources-tab",
                        id="resources-tab",
                        visible=not self.f_user_management,
@ -103,7 +103,7 @@ class App(BaseApp):
                        self.resources_page = ResourcesTab(self)
                with gr.Tab(
-                    "Settings",
+                    "تنظیمات",
                    elem_id="settings-tab",
                    id="settings-tab",
                    visible=not self.f_user_management,
@ -112,7 +112,7 @@ class App(BaseApp):
                    self.settings_page = SettingsPage(self)
            with gr.Tab(
-                "Help",
+                "راهنما",
                elem_id="help-tab",
                id="help-tab",
                visible=not self.f_user_management,
--- a/libs/ktem/ktem/pages/chat/init.py
+++ b/libs/ktem/ktem/pages/chat/init.py
@ -272,7 +272,7 @@ class ChatPage(BasePage):
                if len(self._app.index_manager.indices) > 0:
                    quick_upload_label = (
-                        "Quick Upload" if not KH_DEMO_MODE else "Or input new paper URL"
+                        "بارگذاری" if not KH_DEMO_MODE else "Or input new paper URL"
                    )
                    with gr.Accordion(label=quick_upload_label) as _:
@ -287,9 +287,9 @@ class ChatPage(BasePage):
                            )
                        self.quick_urls = gr.Textbox(
                            placeholder=(
-                                "Or paste URLs"
+                                "یا آدرس وب جایگذاری کنید"
                                if not KH_DEMO_MODE
-                                else "Paste Arxiv URLs\n(https://arxiv.org/abs/xxx)"
+                                else "آدرس وب جایگذاری کنید\n(https://arxiv.org/abs/xxx)"
                            ),
                            lines=1,
                            container=False,
@ -314,17 +314,17 @@ class ChatPage(BasePage):
                self.chat_panel = ChatPanel(self._app)
                with gr.Accordion(
-                    label="Chat settings",
+                    label="تنظیمات گفتگو",
                    elem_id="chat-settings-expand",
                    open=False,
                    visible=not KH_DEMO_MODE,
                ) as self.chat_settings:
                    with gr.Row(elem_id="quick-setting-labels"):
-                        gr.HTML("Reasoning method")
+                        gr.HTML("روش استدلال")
                        gr.HTML(
-                            "Model", visible=not KH_DEMO_MODE and not KH_SSO_ENABLED
+                            "مدل", visible=not KH_DEMO_MODE and not KH_SSO_ENABLED
                        )
-                        gr.HTML("Language")
+                        gr.HTML("زبان")
                    with gr.Row():
                        reasoning_setting = (
@ -372,7 +372,7 @@ class ChatPage(BasePage):
                        if not config("USE_LOW_LLM_REQUESTS", default=False, cast=bool):
                            self.use_mindmap = gr.State(value=True)
                            self.use_mindmap_check = gr.Checkbox(
-                                label="Mindmap (on)",
+                                label="نقشه ذهنی روشن",
                                container=False,
                                elem_id="use-mindmap-checkbox",
                                value=True,
@ -380,7 +380,7 @@ class ChatPage(BasePage):
                        else:
                            self.use_mindmap = gr.State(value=False)
                            self.use_mindmap_check = gr.Checkbox(
-                                label="Mindmap (off)",
+                                label="نقشه ذهنی خاموش",
                                container=False,
                                elem_id="use-mindmap-checkbox",
                                value=False,
@ -390,7 +390,7 @@ class ChatPage(BasePage):
                scale=INFO_PANEL_SCALES[False], elem_id="chat-info-panel"
            ) as self.info_column:
                with gr.Accordion(
-                    label="Information panel", open=True, elem_id="info-expand"
+                    label="پنل اطلاعات", open=True, elem_id="info-expand"
                ):
                    self.modal = gr.HTML("<div id='pdf-modal'></div>")
                    self.plot_panel = gr.Plot(visible=False)
--- a/libs/ktem/ktem/pages/chat/chat_panel.py
+++ b/libs/ktem/ktem/pages/chat/chat_panel.py
@ -6,15 +6,15 @@ KH_DEMO_MODE = getattr(flowsettings, "KH_DEMO_MODE", False)
 if not KH_DEMO_MODE:
    PLACEHOLDER_TEXT = (
-        "This is the beginning of a new conversation.\n"
+        ".این شروع یک گفتگوی جدید است\n"
-        "Start by uploading a file or a web URL. "
+        ".با بارگذاری یک فایل یا یک آدرس وب شروع کنید\n "
-        "Visit Files tab for more options (e.g: GraphRAG)."
+        ".برای گزینه های بیشتر به برگه فایل ها مراجعه کنید "
    )
 else:
    PLACEHOLDER_TEXT = (
-        "Welcome to Kotaemon Demo. "
+        ".به دموی دیتال چت خوش آمدید \n"
-        "Start by browsing preloaded conversations to get onboard.\n"
+        ".برای شروع، مکالمات قبلی بارگذاری شده را مرور کنید\n"
-        "Check out Hint section for more tips."
+        ".برای نکات بیشتر به بخش راهنمایی مراجعه کنید"
    )
@ -28,6 +28,7 @@ class ChatPanel(BasePage):
            label=self._app.app_name,
            placeholder=PLACEHOLDER_TEXT,
            show_label=False,
            rtl = True,
            elem_id="main-chat-bot",
            show_copy_button=True,
            likeable=True,
@ -37,9 +38,10 @@ class ChatPanel(BasePage):
            self.text_input = gr.MultimodalTextbox(
                interactive=True,
                scale=20,
                rtl=True,
                file_count="multiple",
                placeholder=(
-                    "Type a message, search the @web, or tag a file with @filename"
+                    "یک پیام بنویسید"
                ),
                container=False,
                show_label=False,
--- a/libs/ktem/ktem/pages/chat/control.py
+++ b/libs/ktem/ktem/pages/chat/control.py
@ -51,8 +51,8 @@ class ConversationControl(BasePage):
    def on_building_ui(self):
        with gr.Row():
-            title_text = "Conversations" if not KH_DEMO_MODE else "Kotaemon Papers"
+            title_text = "گفتگو ها" if not KH_DEMO_MODE else "Kotaemon Papers"
-            gr.Markdown("## {}".format(title_text))
+            gr.Markdown(f'<div dir="rtl"> {title_text}</div>')
            self.btn_toggle_dark_mode = gr.Button(
                value="",
                icon=f"{ASSETS_DIR}/dark_mode.svg",
@ -66,7 +66,7 @@ class ConversationControl(BasePage):
                icon=f"{ASSETS_DIR}/expand.svg",
                scale=1,
                size="sm",
-                elem_classes=["no-background", "body-text-color"],
+                elem_classes=["no-background", "body-text-color" , "top-left-button"],
                elem_id="chat-expand-button",
            )
            self.btn_info_expand = gr.Button(
@ -75,7 +75,7 @@ class ConversationControl(BasePage):
                min_width=2,
                scale=1,
                size="sm",
-                elem_classes=["no-background", "body-text-color"],
+                elem_classes=["no-background", "body-text-color" , "top-left-button"],
                elem_id="info-expand-button",
            )
@ -102,7 +102,7 @@ class ConversationControl(BasePage):
        with gr.Row() as self._new_delete:
            self.cb_suggest_chat = gr.Checkbox(
                value=False,
-                label="Suggest chat",
+                label=" پیشنهاد گفتگو ",
                min_width=10,
                scale=6,
                elem_id="suggest-chat-checkbox",
@ -111,7 +111,7 @@ class ConversationControl(BasePage):
            )
            self.cb_is_public = gr.Checkbox(
                value=False,
-                label="Share this conversation",
+                label="این گفتگو را ارسال کن",
                elem_id="is-public-checkbox",
                container=False,
                visible=not KH_DEMO_MODE and not KH_SSO_ENABLED,
--- a/libs/ktem/ktem/pages/chat/report.py
+++ b/libs/ktem/ktem/pages/chat/report.py
@ -12,34 +12,31 @@ class ReportIssue(BasePage):
        self.on_building_ui()
    def on_building_ui(self):
-        with gr.Accordion(label="Feedback", open=False, elem_id="report-accordion"):
+        with gr.Accordion(label="بازخورد", open=False, elem_id="report-accordion"):
            self.correctness = gr.Radio(
                choices=[
-                    ("The answer is correct", "correct"),
+                    (" پاسخ صحیح است ", "correct"),
-                    ("The answer is incorrect", "incorrect"),
+                    (" پاسخ اشتباه است ", "incorrect"),
                ],
-                label="Correctness:",
+                label="صحت سنجی:",
            )
            self.issues = gr.CheckboxGroup(
                choices=[
-                    ("The answer is offensive", "offensive"),
+                    (" پاسخ نامحترمانه است ", "offensive"),
-                    ("The evidence is incorrect", "wrong-evidence"),
+                    (" مدارک اشتباه است ", "wrong-evidence"),
                ],
-                label="Other issue:",
+                label="دیگر مشکلات:",
            )
            self.more_detail = gr.Textbox(
                placeholder=(
-                    "More detail (e.g. how wrong is it, what is the "
+                    "جزئیات بیشتر (مثلا چقدر اشتباه است، پاسخ صحیح چیست، و غیره...)"
                    "correct answer, etc...)"
                ),
                container=False,
                lines=3,
            )
-            gr.Markdown(
+            alert_text = "این عمل، چت فعلی و تنظیمات کاربر را برای کمک به تحقیق ارسال خواهد کرد"
-                "This will send the current chat and the user settings to "
+            gr.Markdown(f'<div dir="rtl"> {alert_text}</div>')
-                "help with investigation"
+            self.report_btn = gr.Button("گزارش")
            )
            self.report_btn = gr.Button("Report")
    def report(
        self,
@ -83,4 +80,4 @@ class ReportIssue(BasePage):
            )
            session.add(issue)
            session.commit()
-        gr.Info("Thank you for your feedback")
+        gr.Info("از بازخورد شما متشکریم")
--- a/libs/ktem/ktem/rerankings/manager.py
+++ b/libs/ktem/ktem/rerankings/manager.py
@ -52,9 +52,13 @@ class RerankingManager:
                    self._default = item.name
    def load_vendors(self):
-        from kotaemon.rerankings import CohereReranking, TeiFastReranking
+        from kotaemon.rerankings import (
            CohereReranking,
            TeiFastReranking,
            VoyageAIReranking,
        )
-        self._vendors = [TeiFastReranking, CohereReranking]
+        self._vendors = [TeiFastReranking, CohereReranking, VoyageAIReranking]
    def __getitem__(self, key: str) -> BaseReranking:
        """Get model by name"""
--- a/libs/ktem/ktem/utils/lang.py
+++ b/libs/ktem/ktem/utils/lang.py
@ -1,5 +1,6 @@
 SUPPORTED_LANGUAGE_MAP = {
    "en": "English",
    "fa": "Persian",
    "ja": "Japanese",
    "vi": "Vietnamese",
    "es": "Spanish",
Author	SHA1	Message	Date
Ali-Noghabi	d8309edefb	Add persian lang Some checks failed Auto Bump and Release / auto-bump-and-release (push) Has been cancelled Details style-check / pre-commit (push) Has been cancelled Details unit-test / unit testing with python ${{ matrix.python-version }} (. env/bin/activate, $GITHUB_OUTPUT, ubuntu-latest, bash) (push) Has been cancelled Details unit-test / unit testing with python ${{ matrix.python-version }} (3.10) (push) Has been cancelled Details unit-test / unit testing with python ${{ matrix.python-version }} (3.11) (push) Has been cancelled Details	2025-07-27 10:12:01 +03:30
Ali-Noghabi	17864a439a	add json compatibility for upload file , change app main name , add persian languange compatibility	2025-07-24 15:25:08 +03:30
Khoi-Nguyen Nguyen-Ngoc	37cdc28ceb	fix: add validation to avoid path-traversal vulnerabilities (#755 ) * fix: add validation to avoid path-traversal vulnerabilities * fix: update init value is_safe Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * refactor: extract zip check * fix: dont need to check relative path * fix: disable check zip file (zipfile have taken it) --------- Co-authored-by: kan_cin <kan@cinnamon.is> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: phv2312 <kat87yb@gmail.com>	2025-07-02 14:50:40 +07:00
kan_cin	ec1f6abdc4	fix: typo lancedb (#760 )	2025-07-01 17:11:22 +07:00
mginfn	ffe766f24d	chore: added base_url parameter to CochereReranking (#743 ) Co-authored-by: Mauro Gattari <mauro.gattari@infn.it>	2025-06-05 16:35:00 +07:00
TommasoMoroHtx	833982ac81	fix(docstore): preserve retrieval ranking order in lancedb get() (#745 )	2025-06-05 16:08:49 +07:00
Pang Chun Lam	ddb5187293	fix: scope is not passd to vector store query (#747 )	2025-06-05 16:08:40 +07:00
Tuan Anh Nguyen Dang (Tadashi_Cin)	5132288386	feat: add VoyageAI's rerank and embeddings models (#733 ) #none * Introducing VoyageAI's rerank and embeddings models * fix: comfort CI * fix: update test case --------- Co-authored-by: fzowl <zoltan@voyageai.com>	2025-04-15 15:54:23 +07:00
Amin	c33bedca9e	feat: add options for Mistral AI (#707 ) #none * add Mistral AI emb AI embedding vendor, types * add mistral env setting to example * add mistral LLM option * chore: fix default embedding back to normal * fix: comfort CI --------- Co-authored-by: Tadashi <tadashi@cinnamon.is>	2025-04-15 15:11:22 +07:00
Ben Dykstra	9b05693e4f	feat: add structured output to openai (#603 ) #none * add structured output to openai * remove notebook, modify prepare output method * fix: comfort precommit --------- Co-authored-by: Tadashi <tadashi@cinnamon.is>	2025-04-15 14:54:23 +07:00