feat: add first setup screen for LLM & Embedding models (#314) (bump:minor)

* fix: utf-8 txt reader * fix: revise vectorstore import and make it optional * feat: add cohere chat model with tool call support * fix: simplify citation pipeline * fix: improve citation logic * fix: improve decompose func call * fix: revise question rewrite prompt * fix: revise chat box default placeholder * fix: add key from ktem to cohere rerank * fix: conv name suggestion * fix: ignore default key cohere rerank * fix: improve test connection UI * fix: reorder requirements * feat: add first setup screen * fix: update requirements * fix: vectorstore tests * fix: update cohere version * fix: relax langchain core version * fix: add demo mode * fix: update flowsettings * fix: typo * fix: fix bool env passing
2024-09-22 16:32:23 +07:00 · 2024-09-22 16:32:23 +07:00 · 88d577b0cc
commit 88d577b0cc
parent 0bdb9a32f2
27 changed files with 643 additions and 140 deletions
--- a/flowsettings.py
+++ b/flowsettings.py
@ -24,9 +24,13 @@ if not KH_APP_VERSION:
    except Exception:
        KH_APP_VERSION = "local"
 KH_ENABLE_FIRST_SETUP = True
 KH_DEMO_MODE = config("KH_DEMO_MODE", default=False, cast=bool)
 # App can be ran from anywhere and it's not trivial to decide where to store app data.
 # So let's use the same directory as the flowsetting.py file.
 KH_APP_DATA_DIR = this_dir / "ktem_app_data"
 KH_APP_DATA_EXISTS = KH_APP_DATA_DIR.exists()
 KH_APP_DATA_DIR.mkdir(parents=True, exist_ok=True)
 # User data directory
@ -59,7 +63,9 @@ os.environ["HF_HUB_CACHE"] = str(KH_APP_DATA_DIR / "huggingface")
 KH_DOC_DIR = this_dir / "docs"
 KH_MODE = "dev"
-KH_FEATURE_USER_MANAGEMENT = True
+KH_FEATURE_USER_MANAGEMENT = config(
    "KH_FEATURE_USER_MANAGEMENT", default=True, cast=bool
 )
 KH_USER_CAN_SEE_PUBLIC = None
 KH_FEATURE_USER_MANAGEMENT_ADMIN = str(
    config("KH_FEATURE_USER_MANAGEMENT_ADMIN", default="admin")
@ -202,6 +208,14 @@ KH_LLMS["groq"] = {
    },
    "default": False,
 }
 KH_LLMS["cohere"] = {
    "spec": {
        "__type__": "kotaemon.llms.chats.LCCohereChat",
        "model_name": "command-r-plus-08-2024",
        "api_key": "your-key",
    },
    "default": False,
 }
 # additional embeddings configurations
 KH_EMBEDDINGS["cohere"] = {
--- a/libs/kotaemon/kotaemon/embeddings/langchain_based.py
+++ b/libs/kotaemon/kotaemon/embeddings/langchain_based.py
@ -183,7 +183,7 @@ class LCCohereEmbeddings(LCEmbeddingMixin, BaseEmbeddings):
    def _get_lc_class(self):
        try:
-            from langchain_community.embeddings import CohereEmbeddings
+            from langchain_cohere import CohereEmbeddings
        except ImportError:
            from langchain.embeddings import CohereEmbeddings
--- a/libs/kotaemon/kotaemon/indices/qa/citation.py
+++ b/libs/kotaemon/kotaemon/indices/qa/citation.py
@ -1,4 +1,4 @@
-from typing import Iterator, List
+from typing import List
 from pydantic import BaseModel, Field
@ -7,53 +7,14 @@ from kotaemon.base.schema import HumanMessage, SystemMessage
 from kotaemon.llms import BaseLLM
-class FactWithEvidence(BaseModel):
+class CiteEvidence(BaseModel):
-    """Class representing a single statement.
+    """List of evidences (maximum 5) to support the answer."""
-    Each fact has a body and a list of sources.
+    evidences: List[str] = Field(
    If there are multiple facts make sure to break them apart
    such that each one only uses a set of sources that are relevant to it.
    """
    fact: str = Field(..., description="Body of the sentence, as part of a response")
    substring_quote: List[str] = Field(
        ...,
        description=(
            "Each source should be a direct quote from the context, "
-            "as a substring of the original content"
+            "as a substring of the original content (max 15 words)."
        ),
    )
    def _get_span(self, quote: str, context: str, errs: int = 100) -> Iterator[str]:
        import regex
        minor = quote
        major = context
        errs_ = 0
        s = regex.search(f"({minor}){{e<={errs_}}}", major)
        while s is None and errs_ <= errs:
            errs_ += 1
            s = regex.search(f"({minor}){{e<={errs_}}}", major)
        if s is not None:
            yield from s.spans()
    def get_spans(self, context: str) -> Iterator[str]:
        for quote in self.substring_quote:
            yield from self._get_span(quote, context)
 class QuestionAnswer(BaseModel):
    """A question and its answer as a list of facts each one should have a source.
    each sentence contains a body and a list of sources."""
    question: str = Field(..., description="Question that was asked")
    answer: List[FactWithEvidence] = Field(
        ...,
        description=(
            "Body of the answer, each fact should be "
            "its separate object with a body and a list of sources"
        ),
    )
@ -68,7 +29,7 @@ class CitationPipeline(BaseComponent):
        return self.invoke(context, question)
    def prepare_llm(self, context: str, question: str):
-        schema = QuestionAnswer.schema()
+        schema = CiteEvidence.schema()
        function = {
            "name": schema["title"],
            "description": schema["description"],
@ -76,7 +37,8 @@ class CitationPipeline(BaseComponent):
        }
        llm_kwargs = {
            "tools": [{"type": "function", "function": function}],
-            "tool_choice": "auto",
+            "tool_choice": "required",
            "tools_pydantic": [CiteEvidence],
        }
        messages = [
            SystemMessage(
@ -85,7 +47,12 @@ class CitationPipeline(BaseComponent):
                    "questions with correct and exact citations."
                )
            ),
-            HumanMessage(content="Answer question using the following context"),
+            HumanMessage(
                content=(
                    "Answer question using the following context. "
                    "Use the provided function CiteEvidence() to cite your sources."
                )
            ),
            HumanMessage(content=context),
            HumanMessage(content=f"Question: {question}"),
            HumanMessage(
@ -103,14 +70,24 @@ class CitationPipeline(BaseComponent):
            print("CitationPipeline: invoking LLM")
            llm_output = self.get_from_path("llm").invoke(messages, **llm_kwargs)
            print("CitationPipeline: finish invoking LLM")
-            if not llm_output.messages or not llm_output.additional_kwargs.get(
+            if not llm_output.additional_kwargs.get("tool_calls"):
                "tool_calls"
            ):
                return None
-            function_output = llm_output.additional_kwargs["tool_calls"][0]["function"][
+
-                "arguments"
+            first_func = llm_output.additional_kwargs["tool_calls"][0]
-            ]
+
-            output = QuestionAnswer.parse_raw(function_output)
+            if "function" in first_func:
                # openai and cohere format
                function_output = first_func["function"]["arguments"]
            else:
                # anthropic format
                function_output = first_func["args"]
            print("CitationPipeline:", function_output)
            if isinstance(function_output, str):
                output = CiteEvidence.parse_raw(function_output)
            else:
                output = CiteEvidence.parse_obj(function_output)
        except Exception as e:
            print(e)
            return None
@ -118,18 +95,4 @@ class CitationPipeline(BaseComponent):
        return output
    async def ainvoke(self, context: str, question: str):
-        messages, llm_kwargs = self.prepare_llm(context, question)
+        raise NotImplementedError()
        try:
            print("CitationPipeline: async invoking LLM")
            llm_output = await self.get_from_path("llm").ainvoke(messages, **llm_kwargs)
            print("CitationPipeline: finish async invoking LLM")
            function_output = llm_output.additional_kwargs["tool_calls"][0]["function"][
                "arguments"
            ]
            output = QuestionAnswer.parse_raw(function_output)
        except Exception as e:
            print(e)
            return None
        return output
--- a/libs/kotaemon/kotaemon/indices/rankings/cohere.py
+++ b/libs/kotaemon/kotaemon/indices/rankings/cohere.py
@ -10,6 +10,7 @@ from .base import BaseReranking
 class CohereReranking(BaseReranking):
    model_name: str = "rerank-multilingual-v2.0"
    cohere_api_key: str = config("COHERE_API_KEY", "")
    use_key_from_ktem: bool = False
    def run(self, documents: list[Document], query: str) -> list[Document]:
        """Use Cohere Reranker model to re-order documents
@ -18,9 +19,25 @@ class CohereReranking(BaseReranking):
            import cohere
        except ImportError:
            raise ImportError(
-                "Please install Cohere " "`pip install cohere` to use Cohere Reranking"
+                "Please install Cohere `pip install cohere` to use Cohere Reranking"
            )
        # try to get COHERE_API_KEY from embeddings
        if not self.cohere_api_key and self.use_key_from_ktem:
            try:
                from ktem.embeddings.manager import (
                    embedding_models_manager as embeddings,
                )
                cohere_model = embeddings.get("cohere")
                ktem_cohere_api_key = cohere_model._kwargs.get(  # type: ignore
                    "cohere_api_key"
                )
                if ktem_cohere_api_key != "your-key":
                    self.cohere_api_key = ktem_cohere_api_key
            except Exception as e:
                print("Cannot get Cohere API key from `ktem`", e)
        if not self.cohere_api_key:
            print("Cohere API key not found. Skipping reranking.")
            return documents
@ -35,7 +52,7 @@ class CohereReranking(BaseReranking):
        response = cohere_client.rerank(
            model=self.model_name, query=query, documents=_docs
        )
-        print("Cohere score", [r.relevance_score for r in response.results])
+        # print("Cohere score", [r.relevance_score for r in response.results])
        for r in response.results:
            doc = documents[r.index]
            doc.metadata["cohere_reranking_score"] = r.relevance_score
--- a/libs/kotaemon/kotaemon/llms/init.py
+++ b/libs/kotaemon/kotaemon/llms/init.py
@ -10,6 +10,7 @@ from .chats import (
    LCAnthropicChat,
    LCAzureChatOpenAI,
    LCChatOpenAI,
    LCCohereChat,
    LCGeminiChat,
    LlamaCppChat,
 )
@ -31,6 +32,7 @@ __all__ = [
    "ChatOpenAI",
    "LCAnthropicChat",
    "LCGeminiChat",
    "LCCohereChat",
    "LCAzureChatOpenAI",
    "LCChatOpenAI",
    "LlamaCppChat",
--- a/libs/kotaemon/kotaemon/llms/chats/init.py
+++ b/libs/kotaemon/kotaemon/llms/chats/init.py
@ -5,6 +5,7 @@ from .langchain_based import (
    LCAzureChatOpenAI,
    LCChatMixin,
    LCChatOpenAI,
    LCCohereChat,
    LCGeminiChat,
 )
 from .llamacpp import LlamaCppChat
@ -18,6 +19,7 @@ __all__ = [
    "ChatOpenAI",
    "LCAnthropicChat",
    "LCGeminiChat",
    "LCCohereChat",
    "LCChatOpenAI",
    "LCAzureChatOpenAI",
    "LCChatMixin",
--- a/libs/kotaemon/kotaemon/llms/chats/langchain_based.py
+++ b/libs/kotaemon/kotaemon/llms/chats/langchain_based.py
@ -18,6 +18,9 @@ class LCChatMixin:
            "Please return the relevant Langchain class in in _get_lc_class"
        )
    def _get_tool_call_kwargs(self):
        return {}
    def __init__(self, stream: bool = False, **params):
        self._lc_class = self._get_lc_class()
        self._obj = self._lc_class(**params)
@ -56,9 +59,7 @@ class LCChatMixin:
                total_tokens = pred.llm_output["token_usage"]["total_tokens"]
                prompt_tokens = pred.llm_output["token_usage"]["prompt_tokens"]
        except Exception:
-            logger.warning(
+            pass
                f"Cannot get token usage from LLM output for {self._lc_class.__name__}"
            )
        return LLMInterface(
            text=all_text[0] if len(all_text) > 0 else "",
@ -83,8 +84,30 @@ class LCChatMixin:
            LLMInterface: generated response
        """
        input_ = self.prepare_message(messages)
        if "tools_pydantic" in kwargs:
            tools = kwargs.pop(
                "tools_pydantic",
            )
            lc_tool_call = self._obj.bind_tools(tools)
            pred = lc_tool_call.invoke(
                input_,
                **self._get_tool_call_kwargs(),
            )
            if pred.tool_calls:
                tool_calls = pred.tool_calls
            else:
                tool_calls = pred.additional_kwargs.get("tool_calls", [])
            output = LLMInterface(
                content="",
                additional_kwargs={"tool_calls": tool_calls},
            )
        else:
            pred = self._obj.generate(messages=[input_], **kwargs)
-        return self.prepare_response(pred)
+            output = self.prepare_response(pred)
        return output
    async def ainvoke(
        self, messages: str | BaseMessage | list[BaseMessage], **kwargs
@ -235,6 +258,9 @@ class LCAnthropicChat(LCChatMixin, ChatLLM):  # type: ignore
        required=True,
    )
    def _get_tool_call_kwargs(self):
        return {"tool_choice": {"type": "any"}}
    def __init__(
        self,
        api_key: str | None = None,
@ -291,3 +317,35 @@ class LCGeminiChat(LCChatMixin, ChatLLM):  # type: ignore
            raise ImportError("Please install langchain-google-genai")
        return ChatGoogleGenerativeAI
 class LCCohereChat(LCChatMixin, ChatLLM):  # type: ignore
    api_key: str = Param(
        help="API key (https://dashboard.cohere.com/api-keys)", required=True
    )
    model_name: str = Param(
        help=("Model name to use (https://dashboard.cohere.com/playground/chat)"),
        required=True,
    )
    def __init__(
        self,
        api_key: str | None = None,
        model_name: str | None = None,
        temperature: float = 0.7,
        **params,
    ):
        super().__init__(
            cohere_api_key=api_key,
            model_name=model_name,
            temperature=temperature,
            **params,
        )
    def _get_lc_class(self):
        try:
            from langchain_cohere import ChatCohere
        except ImportError:
            raise ImportError("Please install langchain-cohere")
        return ChatCohere
--- a/libs/kotaemon/kotaemon/llms/chats/openai.py
+++ b/libs/kotaemon/kotaemon/llms/chats/openai.py
@ -292,6 +292,9 @@ class ChatOpenAI(BaseChatOpenAI):
    def openai_response(self, client, **kwargs):
        """Get the openai response"""
        if "tools_pydantic" in kwargs:
            kwargs.pop("tools_pydantic")
        params_ = {
            "model": self.model,
            "temperature": self.temperature,
@ -360,6 +363,9 @@ class AzureChatOpenAI(BaseChatOpenAI):
    def openai_response(self, client, **kwargs):
        """Get the openai response"""
        if "tools_pydantic" in kwargs:
            kwargs.pop("tools_pydantic")
        params_ = {
            "model": self.azure_deployment,
            "temperature": self.temperature,
--- a/libs/kotaemon/kotaemon/loaders/txt_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/txt_loader.py
@ -15,7 +15,7 @@ class TxtReader(BaseReader):
    def load_data(
        self, file_path: Path, extra_info: Optional[dict] = None, **kwargs
    ) -> list[Document]:
-        with open(file_path, "r") as f:
+        with open(file_path, "r", encoding="utf-8") as f:
            text = f.read()
        metadata = extra_info or {}
--- a/libs/kotaemon/kotaemon/storages/vectorstores/base.py
+++ b/libs/kotaemon/kotaemon/storages/vectorstores/base.py
@ -73,17 +73,25 @@ class BaseVectorStore(ABC):
 class LlamaIndexVectorStore(BaseVectorStore):
-    _li_class: type[LIVectorStore | BasePydanticVectorStore]
+    """Mixin for LlamaIndex based vectorstores"""
    _li_class: type[LIVectorStore | BasePydanticVectorStore] | None
    def _get_li_class(self):
        raise NotImplementedError(
            "Please return the relevant LlamaIndex class in in _get_li_class"
        )
    def __init__(self, *args, **kwargs):
-        if self._li_class is None:
+        # get li_class from the method if not set
-            raise AttributeError(
+        if not self._li_class:
-                "Require `_li_class` to set a VectorStore class from LlamarIndex"
+            LIClass = self._get_li_class()
-            )
+        else:
            LIClass = self._li_class
        from dataclasses import fields
-        self._client = self._li_class(*args, **kwargs)
+        self._client = LIClass(*args, **kwargs)
        self._vsq_kwargs = {_.name for _ in fields(VectorStoreQuery)}
        for key in ["query_embedding", "similarity_top_k", "node_ids"]:
@ -97,6 +105,9 @@ class LlamaIndexVectorStore(BaseVectorStore):
        return setattr(self._client, name, value)
    def __getattr__(self, name: str) -> Any:
        if name == "_li_class":
            return super().__getattribute__(name)
        return getattr(self._client, name)
    def add(
--- a/libs/kotaemon/kotaemon/storages/vectorstores/milvus.py
+++ b/libs/kotaemon/kotaemon/storages/vectorstores/milvus.py
@ -1,7 +1,5 @@
 import os
-from typing import Any, Optional, Type, cast
+from typing import Any, Optional, cast
 from llama_index.vector_stores.milvus import MilvusVectorStore as LIMilvusVectorStore
 from kotaemon.base import DocumentWithEmbedding
@ -9,7 +7,20 @@ from .base import LlamaIndexVectorStore
 class MilvusVectorStore(LlamaIndexVectorStore):
-    _li_class: Type[LIMilvusVectorStore] = LIMilvusVectorStore
+    _li_class = None
    def _get_li_class(self):
        try:
            from llama_index.vector_stores.milvus import (
                MilvusVectorStore as LIMilvusVectorStore,
            )
        except ImportError:
            raise ImportError(
                "Please install missing package: "
                "'pip install llama-index-vector-stores-milvus'"
            )
        return LIMilvusVectorStore
    def __init__(
        self,
@ -46,6 +57,10 @@ class MilvusVectorStore(LlamaIndexVectorStore):
                dim=dim,
                **self._kwargs,
            )
            from llama_index.vector_stores.milvus import (
                MilvusVectorStore as LIMilvusVectorStore,
            )
            self._client = cast(LIMilvusVectorStore, self._client)
        self._inited = True
--- a/libs/kotaemon/kotaemon/storages/vectorstores/qdrant.py
+++ b/libs/kotaemon/kotaemon/storages/vectorstores/qdrant.py
@ -1,12 +1,23 @@
-from typing import Any, List, Optional, Type, cast
+from typing import Any, List, Optional, cast
 from llama_index.vector_stores.qdrant import QdrantVectorStore as LIQdrantVectorStore
 from .base import LlamaIndexVectorStore
 class QdrantVectorStore(LlamaIndexVectorStore):
-    _li_class: Type[LIQdrantVectorStore] = LIQdrantVectorStore
+    _li_class = None
    def _get_li_class(self):
        try:
            from llama_index.vector_stores.qdrant import (
                QdrantVectorStore as LIQdrantVectorStore,
            )
        except ImportError:
            raise ImportError(
                "Please install missing package: "
                "'pip install llama-index-vector-stores-qdrant'"
            )
        return LIQdrantVectorStore
    def __init__(
        self,
@ -29,6 +40,10 @@ class QdrantVectorStore(LlamaIndexVectorStore):
            client_kwargs=client_kwargs,
            **kwargs,
        )
        from llama_index.vector_stores.qdrant import (
            QdrantVectorStore as LIQdrantVectorStore,
        )
        self._client = cast(LIQdrantVectorStore, self._client)
    def delete(self, ids: List[str], **kwargs):
--- a/libs/kotaemon/pyproject.toml
+++ b/libs/kotaemon/pyproject.toml
@ -30,16 +30,15 @@ dependencies = [
    "fastapi<=0.112.1",
    "gradio>=4.31.0,<4.40",
    "html2text==2024.2.26",
-    "langchain>=0.1.16,<0.2.0",
+    "langchain>=0.1.16,<0.2.16",
-    "langchain-anthropic",
+    "langchain-community>=0.0.34,<=0.2.11",
    "langchain-community>=0.0.34,<0.1.0",
    "langchain-openai>=0.1.4,<0.2.0",
    "langchain-anthropic",
    "langchain-cohere>=0.2.4,<0.3.0",
    "llama-hub>=0.0.79,<0.1.0",
    "llama-index>=0.10.40,<0.11.0",
    "llama-index-vector-stores-chroma>=0.1.9",
    "llama-index-vector-stores-lancedb",
    "llama-index-vector-stores-milvus",
    "llama-index-vector-stores-qdrant",
    "openai>=1.23.6,<2",
    "openpyxl>=3.1.2,<3.2",
    "opentelemetry-exporter-otlp-proto-grpc>=1.25.0", # https://github.com/chroma-core/chroma/issues/2571
@ -75,6 +74,9 @@ adv = [
    "llama-cpp-python<0.2.8",
    "sentence-transformers",
    "wikipedia>=1.4.0,<1.5",
    "llama-index>=0.10.40,<0.11.0",
    "llama-index-vector-stores-milvus",
    "llama-index-vector-stores-qdrant",
 ]
 dev = [
    "black",
--- a/libs/kotaemon/tests/test_embedding_models.py
+++ b/libs/kotaemon/tests/test_embedding_models.py
@ -135,7 +135,7 @@ def test_lchuggingface_embeddings(
@skip_when_cohere_not_installed
@patch(
-    "langchain.embeddings.cohere.CohereEmbeddings.embed_documents",
+    "langchain_cohere.CohereEmbeddings.embed_documents",
    side_effect=lambda *args, **kwargs: [[1.0, 2.1, 3.2]],
 )
 def test_lccohere_embeddings(langchain_cohere_embedding_call):
--- a/libs/ktem/ktem/embeddings/ui.py
+++ b/libs/ktem/ktem/embeddings/ui.py
@ -354,7 +354,7 @@ class EmbeddingManagement(BasePage):
            _ = emb("Hi")
            log_content += (
-                "<mark style='background: yellow; color: red'>- Connection success. "
+                "<mark style='background: green; color: white'>- Connection success. "
                "</mark><br>"
            )
            yield log_content
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@ -285,7 +285,7 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever):
            ],
            retrieval_mode=user_settings["retrieval_mode"],
            llm_scorer=(LLMTrulensScoring() if use_llm_reranking else None),
-            rerankers=[CohereReranking()],
+            rerankers=[CohereReranking(use_key_from_ktem=True)],
        )
        if not user_settings["use_reranking"]:
            retriever.rerankers = []  # type: ignore
--- a/libs/ktem/ktem/index/file/ui.py
+++ b/libs/ktem/ktem/index/file/ui.py
@ -828,7 +828,6 @@ class FileIndexPage(BasePage):
                ]
            )
        print(f"{len(results)=}, {len(file_list)=}")
        return results, file_list
    def interact_file_list(self, list_files, ev: gr.SelectData):
--- a/libs/ktem/ktem/llms/manager.py
+++ b/libs/ktem/ktem/llms/manager.py
@ -58,6 +58,7 @@ class LLMManager:
            AzureChatOpenAI,
            ChatOpenAI,
            LCAnthropicChat,
            LCCohereChat,
            LCGeminiChat,
            LlamaCppChat,
        )
@ -67,6 +68,7 @@ class LLMManager:
            AzureChatOpenAI,
            LCAnthropicChat,
            LCGeminiChat,
            LCCohereChat,
            LlamaCppChat,
        ]
--- a/libs/ktem/ktem/llms/ui.py
+++ b/libs/ktem/ktem/llms/ui.py
@ -353,7 +353,7 @@ class LLMManagement(BasePage):
            respond = llm("Hi")
            log_content += (
-                f"<mark style='background: yellow; color: red'>- Connection success. "
+                f"<mark style='background: green; color: white'>- Connection success. "
                f"Got response:\n {respond}</mark><br>"
            )
            yield log_content
--- a/libs/ktem/ktem/main.py
+++ b/libs/ktem/ktem/main.py
@ -1,9 +1,27 @@
 import gradio as gr
 from decouple import config
 from ktem.app import BaseApp
 from ktem.pages.chat import ChatPage
 from ktem.pages.help import HelpPage
 from ktem.pages.resources import ResourcesTab
 from ktem.pages.settings import SettingsPage
 from ktem.pages.setup import SetupPage
 from theflow.settings import settings as flowsettings
 KH_DEMO_MODE = getattr(flowsettings, "KH_DEMO_MODE", False)
 KH_ENABLE_FIRST_SETUP = getattr(flowsettings, "KH_ENABLE_FIRST_SETUP", False)
 KH_APP_DATA_EXISTS = getattr(flowsettings, "KH_APP_DATA_EXISTS", True)
 # override first setup setting
 if config("KH_FIRST_SETUP", default=False, cast=bool):
    KH_APP_DATA_EXISTS = False
 def toggle_first_setup_visibility():
    global KH_APP_DATA_EXISTS
    is_first_setup = KH_DEMO_MODE or not KH_APP_DATA_EXISTS
    KH_APP_DATA_EXISTS = True
    return gr.update(visible=is_first_setup), gr.update(visible=not is_first_setup)
 class App(BaseApp):
@ -99,13 +117,17 @@ class App(BaseApp):
            ) as self._tabs["help-tab"]:
                self.help_page = HelpPage(self)
        if KH_ENABLE_FIRST_SETUP:
            with gr.Column(visible=False) as self.setup_page_wrapper:
                self.setup_page = SetupPage(self)
    def on_subscribe_public_events(self):
        if self.f_user_management:
            from ktem.db.engine import engine
            from ktem.db.models import User
            from sqlmodel import Session, select
-            def signed_in_out(user_id):
+            def toggle_login_visibility(user_id):
                if not user_id:
                    return list(
                        (
@ -146,7 +168,7 @@ class App(BaseApp):
            self.subscribe_event(
                name="onSignIn",
                definition={
-                    "fn": signed_in_out,
+                    "fn": toggle_login_visibility,
                    "inputs": [self.user_id],
                    "outputs": list(self._tabs.values()) + [self.tabs],
                    "show_progress": "hidden",
@ -156,9 +178,30 @@ class App(BaseApp):
            self.subscribe_event(
                name="onSignOut",
                definition={
-                    "fn": signed_in_out,
+                    "fn": toggle_login_visibility,
                    "inputs": [self.user_id],
                    "outputs": list(self._tabs.values()) + [self.tabs],
                    "show_progress": "hidden",
                },
            )
        if KH_ENABLE_FIRST_SETUP:
            self.subscribe_event(
                name="onFirstSetupComplete",
                definition={
                    "fn": toggle_first_setup_visibility,
                    "inputs": [],
                    "outputs": [self.setup_page_wrapper, self.tabs],
                    "show_progress": "hidden",
                },
            )
    def _on_app_created(self):
        """Called when the app is created"""
        if KH_ENABLE_FIRST_SETUP:
            self.app.load(
                toggle_first_setup_visibility,
                inputs=[],
                outputs=[self.setup_page_wrapper, self.tabs],
            )
--- a/libs/ktem/ktem/pages/chat/init.py
+++ b/libs/ktem/ktem/pages/chat/init.py
@ -883,7 +883,8 @@ class ChatPage(BasePage):
        # check if this is a newly created conversation
        if len(chat_history) == 1:
-            suggested_name = suggest_pipeline(chat_history).text[:40]
+            suggested_name = suggest_pipeline(chat_history).text
            suggested_name = suggested_name.replace('"', "").replace("'", "")[:40]
            new_name = gr.update(value=suggested_name)
            renamed = True
--- a/libs/ktem/ktem/pages/chat/chat_panel.py
+++ b/libs/ktem/ktem/pages/chat/chat_panel.py
@ -11,8 +11,8 @@ class ChatPanel(BasePage):
        self.chatbot = gr.Chatbot(
            label=self._app.app_name,
            placeholder=(
-                "This is the beginning of a new conversation.\nMake sure to have added"
+                "This is the beginning of a new conversation.\nIf you are new, "
-                " a LLM by following the instructions in the Help tab."
+                "visit the Help tab for quick instructions."
            ),
            show_label=False,
            elem_id="main-chat-bot",
--- a/libs/ktem/ktem/pages/setup.py
+++ b/libs/ktem/ktem/pages/setup.py
@ -0,0 +1,347 @@
 import json
 import gradio as gr
 import requests
 from ktem.app import BasePage
 from ktem.embeddings.manager import embedding_models_manager as embeddings
 from ktem.llms.manager import llms
 from theflow.settings import settings as flowsettings
 KH_DEMO_MODE = getattr(flowsettings, "KH_DEMO_MODE", False)
 DEFAULT_OLLAMA_URL = "http://localhost:11434/api"
 DEMO_MESSAGE = (
    "This is a public space. Please use the "
    '"Duplicate Space" function on the top right '
    "corner to setup your own space."
 )
 def pull_model(name: str, stream: bool = True):
    payload = {"name": name}
    headers = {"Content-Type": "application/json"}
    response = requests.post(
        DEFAULT_OLLAMA_URL + "/pull", json=payload, headers=headers, stream=stream
    )
    # Check if the request was successful
    response.raise_for_status()
    if stream:
        for line in response.iter_lines():
            if line:
                data = json.loads(line.decode("utf-8"))
                yield data
                if data.get("status") == "success":
                    break
    else:
        data = response.json()
    return data
 class SetupPage(BasePage):
    public_events = ["onFirstSetupComplete"]
    def __init__(self, app):
        self._app = app
        self.on_building_ui()
    def on_building_ui(self):
        gr.Markdown(f"# Welcome to {self._app.app_name} first setup!")
        self.radio_model = gr.Radio(
            [
                ("Cohere API (*free registration* available) - recommended", "cohere"),
                ("OpenAI API (for more advance models)", "openai"),
                ("Local LLM (for completely *private RAG*)", "ollama"),
            ],
            label="Select your model provider",
            value="cohere",
            info=(
                "Note: You can change this later. "
                "If you are not sure, go with the first option "
                "which fits most normal users."
            ),
            interactive=True,
        )
        with gr.Column(visible=False) as self.openai_option:
            gr.Markdown(
                (
                    "#### OpenAI API Key\n\n"
                    "(create at https://platform.openai.com/api-keys)"
                )
            )
            self.openai_api_key = gr.Textbox(
                show_label=False, placeholder="OpenAI API Key"
            )
        with gr.Column(visible=True) as self.cohere_option:
            gr.Markdown(
                (
                    "#### Cohere API Key\n\n"
                    "(register your free API key "
                    "at https://dashboard.cohere.com/api-keys)"
                )
            )
            self.cohere_api_key = gr.Textbox(
                show_label=False, placeholder="Cohere API Key"
            )
        with gr.Column(visible=False) as self.ollama_option:
            gr.Markdown(
                (
                    "#### Setup Ollama\n\n"
                    "Download and install Ollama from "
                    "https://ollama.com/"
                )
            )
        self.setup_log = gr.HTML(
            show_label=False,
        )
        with gr.Row():
            self.btn_finish = gr.Button("Proceed", variant="primary")
            self.btn_skip = gr.Button(
                "I am an advance user. Skip this.", variant="stop"
            )
    def on_register_events(self):
        onFirstSetupComplete = gr.on(
            triggers=[
                self.btn_finish.click,
                self.cohere_api_key.submit,
                self.openai_api_key.submit,
            ],
            fn=self.update_model,
            inputs=[self.cohere_api_key, self.openai_api_key, self.radio_model],
            outputs=[self.setup_log],
            show_progress="hidden",
        )
        if not KH_DEMO_MODE:
            onSkipSetup = gr.on(
                triggers=[self.btn_skip.click],
                fn=lambda: None,
                inputs=[],
                show_progress="hidden",
                outputs=[self.radio_model],
            )
            for event in self._app.get_event("onFirstSetupComplete"):
                onSkipSetup = onSkipSetup.success(**event)
        onFirstSetupComplete = onFirstSetupComplete.success(
            fn=self.update_default_settings,
            inputs=[self.radio_model, self._app.settings_state],
            outputs=self._app.settings_state,
        )
        for event in self._app.get_event("onFirstSetupComplete"):
            onFirstSetupComplete = onFirstSetupComplete.success(**event)
        self.radio_model.change(
            fn=self.switch_options_view,
            inputs=[self.radio_model],
            show_progress="hidden",
            outputs=[self.cohere_option, self.openai_option, self.ollama_option],
        )
    def update_model(
        self,
        cohere_api_key,
        openai_api_key,
        radio_model_value,
    ):
        # skip if KH_DEMO_MODE
        if KH_DEMO_MODE:
            raise gr.Error(DEMO_MESSAGE)
        log_content = ""
        if not radio_model_value:
            gr.Info("Skip setup models.")
            yield gr.value(visible=False)
            return
        if radio_model_value == "cohere":
            if cohere_api_key:
                llms.update(
                    name="cohere",
                    spec={
                        "__type__": "kotaemon.llms.chats.LCCohereChat",
                        "model_name": "command-r-plus-08-2024",
                        "api_key": cohere_api_key,
                    },
                    default=True,
                )
                embeddings.update(
                    name="cohere",
                    spec={
                        "__type__": "kotaemon.embeddings.LCCohereEmbeddings",
                        "model": "embed-multilingual-v2.0",
                        "cohere_api_key": cohere_api_key,
                        "user_agent": "default",
                    },
                    default=True,
                )
        elif radio_model_value == "openai":
            if openai_api_key:
                llms.update(
                    name="openai",
                    spec={
                        "__type__": "kotaemon.llms.ChatOpenAI",
                        "base_url": "https://api.openai.com/v1",
                        "model": "gpt-4o",
                        "api_key": openai_api_key,
                        "timeout": 20,
                    },
                    default=True,
                )
                embeddings.update(
                    name="openai",
                    spec={
                        "__type__": "kotaemon.embeddings.OpenAIEmbeddings",
                        "base_url": "https://api.openai.com/v1",
                        "model": "text-embedding-3-large",
                        "api_key": openai_api_key,
                        "timeout": 10,
                        "context_length": 8191,
                    },
                    default=True,
                )
        elif radio_model_value == "ollama":
            llms.update(
                name="ollama",
                spec={
                    "__type__": "kotaemon.llms.ChatOpenAI",
                    "base_url": "http://localhost:11434/v1/",
                    "model": "llama3.1:8b",
                    "api_key": "ollama",
                },
                default=True,
            )
            embeddings.update(
                name="ollama",
                spec={
                    "__type__": "kotaemon.embeddings.OpenAIEmbeddings",
                    "base_url": "http://localhost:11434/v1/",
                    "model": "nomic-embed-text",
                    "api_key": "ollama",
                },
                default=True,
            )
            # download required models through ollama
            llm_model_name = llms.get("ollama").model  # type: ignore
            emb_model_name = embeddings.get("ollama").model  # type: ignore
            try:
                for model_name in [emb_model_name, llm_model_name]:
                    log_content += f"- Downloading model `{model_name}` from Ollama<br>"
                    yield log_content
                    pre_download_log = log_content
                    for response in pull_model(model_name):
                        complete = response.get("completed", 0)
                        total = response.get("total", 0)
                        if complete > 0 and total > 0:
                            ratio = int(complete / total * 100)
                            log_content = (
                                pre_download_log
                                + f"- {response.get('status')}: {ratio}%<br>"
                            )
                        else:
                            if "pulling" not in response.get("status", ""):
                                log_content += f"- {response.get('status')}<br>"
                        yield log_content
            except Exception as e:
                log_content += (
                    "Make sure you have download and installed Ollama correctly."
                    f"Got error: {str(e)}"
                )
                yield log_content
                raise gr.Error("Failed to download model from Ollama.")
        # test models connection
        llm_output = emb_output = None
        # LLM model
        log_content += f"- Testing LLM model: {radio_model_value}<br>"
        yield log_content
        llm = llms.get(radio_model_value)  # type: ignore
        log_content += "- Sending a message `Hi`<br>"
        yield log_content
        try:
            llm_output = llm("Hi")
        except Exception as e:
            log_content += (
                f"<mark style='color: yellow; background: red'>- Connection failed. "
                f"Got error:\n {str(e)}</mark>"
            )
        if llm_output:
            log_content += (
                "<mark style='background: green; color: white'>- Connection success. "
                "</mark><br>"
            )
        yield log_content
        if llm_output:
            # embedding model
            log_content += f"- Testing Embedding model: {radio_model_value}<br>"
            yield log_content
            emb = embeddings.get(radio_model_value)
            assert emb, f"Embedding model {radio_model_value} not found."
            log_content += "- Sending a message `Hi`<br>"
            yield log_content
            try:
                emb_output = emb("Hi")
            except Exception as e:
                log_content += (
                    f"<mark style='color: yellow; background: red'>"
                    "- Connection failed. "
                    f"Got error:\n {str(e)}</mark>"
                )
            if emb_output:
                log_content += (
                    "<mark style='background: green; color: white'>"
                    "- Connection success. "
                    "</mark><br>"
                )
            yield log_content
        if llm_output and emb_output:
            gr.Info("Setup models completed successfully!")
        else:
            raise gr.Error(
                "Setup models failed. Please verify your connection and API key."
            )
    def update_default_settings(self, radio_model_value, default_settings):
        # revise default settings
        # reranking llm
        default_settings["index.options.1.reranking_llm"] = radio_model_value
        if radio_model_value == "ollama":
            default_settings["index.options.1.use_llm_reranking"] = False
        return default_settings
    def switch_options_view(self, radio_model_value):
        components_visible = [gr.update(visible=False) for _ in range(3)]
        values = ["cohere", "openai", "ollama", None]
        assert radio_model_value in values, f"Invalid value {radio_model_value}"
        if radio_model_value is not None:
            idx = values.index(radio_model_value)
            components_visible[idx] = gr.update(visible=True)
        return components_visible
--- a/libs/ktem/ktem/reasoning/prompt_optimization/decompose_question.py
+++ b/libs/ktem/ktem/reasoning/prompt_optimization/decompose_question.py
@ -52,6 +52,7 @@ class DecomposeQuestionPipeline(RewriteQuestionPipeline):
        llm_kwargs = {
            "tools": [{"type": "function", "function": function}],
            "tool_choice": "auto",
            "tools_pydantic": [SubQuery],
        }
        messages = [
--- a/libs/ktem/ktem/reasoning/prompt_optimization/rewrite_question.py
+++ b/libs/ktem/ktem/reasoning/prompt_optimization/rewrite_question.py
@ -7,6 +7,7 @@ DEFAULT_REWRITE_PROMPT = (
    "Given the following question, rephrase and expand it "
    "to help you do better answering. Maintain all information "
    "in the original question. Keep the question as concise as possible. "
    "Only output the rephrased question without additional information. "
    "Give answer in {lang}\n"
    "Original question: {question}\n"
    "Rephrased question: "
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@ -39,10 +39,13 @@ EVIDENCE_MODE_TABLE = 1
 EVIDENCE_MODE_CHATBOT = 2
 EVIDENCE_MODE_FIGURE = 3
 MAX_IMAGES = 10
 CITATION_TIMEOUT = 5.0
 def find_text(search_span, context):
    sentence_list = search_span.split("\n")
    context = context.replace("\n", " ")
    matches = []
    # don't search for small text
    if len(search_span) > 5:
@ -50,7 +53,7 @@ def find_text(search_span, context):
            match = SequenceMatcher(
                None, sentence, context, autojunk=False
            ).find_longest_match()
-            if match.size > len(sentence) * 0.35:
+            if match.size > max(len(sentence) * 0.35, 5):
                matches.append((match.b, match.b + match.size))
    return matches
@ -200,15 +203,6 @@ DEFAULT_QA_FIGURE_PROMPT = (
    "Answer: "
 )  # noqa
 DEFAULT_REWRITE_PROMPT = (
    "Given the following question, rephrase and expand it "
    "to help you do better answering. Maintain all information "
    "in the original question. Keep the question as concise as possible. "
    "Give answer in {lang}\n"
    "Original question: {question}\n"
    "Rephrased question: "
 )  # noqa
 CONTEXT_RELEVANT_WARNING_SCORE = 0.7
@ -391,7 +385,8 @@ class AnswerWithContextPipeline(BaseComponent):
            qa_score = None
        if citation_thread:
-            citation_thread.join()
+            citation_thread.join(timeout=CITATION_TIMEOUT)
        answer = Document(
            text=output,
            metadata={"citation": citation, "qa_score": qa_score},
@ -525,9 +520,9 @@ class FullQAPipeline(BaseReasoning):
        spans = defaultdict(list)
        has_llm_score = any("llm_trulens_score" in doc.metadata for doc in docs)
-        if answer.metadata["citation"] and answer.metadata["citation"].answer:
+        if answer.metadata["citation"]:
-            for fact_with_evidence in answer.metadata["citation"].answer:
+            evidences = answer.metadata["citation"].evidences
-                for quote in fact_with_evidence.substring_quote:
+            for quote in evidences:
                matched_excerpts = []
                for doc in docs:
                    matches = find_text(quote, doc.text)
@ -542,7 +537,7 @@ class FullQAPipeline(BaseReasoning):
                            )
                            matched_excerpts.append(doc.text[start:end])
-                    print("Matched citation:", quote, matched_excerpts),
+                # print("Matched citation:", quote, matched_excerpts),
        id2docs = {doc.doc_id: doc for doc in docs}
        not_detected = set(id2docs.keys()) - set(spans.keys())
--- a/libs/ktem/ktem/utils/render.py
+++ b/libs/ktem/ktem/utils/render.py
@ -75,7 +75,6 @@ class Render:
        if not highlight_text:
            try:
                lang = detect(text.replace("\n", " "))["lang"]
                print("lang", lang)
                if lang not in ["ja", "cn"]:
                    highlight_words = [
                        t[:-1] if t.endswith("-") else t for t in text.split("\n")
@ -83,10 +82,13 @@ class Render:
                    highlight_text = highlight_words[0]
                    phrase = "true"
                else:
                    highlight_text = text.replace("\n", "")
                    phrase = "false"
-                print("highlight_text", highlight_text, phrase)
+                highlight_text = (
                    text.replace("\n", "").replace('"', "").replace("'", "")
                )
                # print("highlight_text", highlight_text, phrase, lang)
            except Exception as e:
                print(e)
                highlight_text = text
@ -162,8 +164,15 @@ class Render:
        if item_type_prefix:
            item_type_prefix += " from "
        if llm_reranking_score > 0:
            relevant_score = llm_reranking_score
        elif cohere_reranking_score > 0:
            relevant_score = cohere_reranking_score
        else:
            relevant_score = 0.0
        rendered_score = Render.collapsible(
-            header=f"<b>&emsp;Relevance score</b>: {llm_reranking_score}",
+            header=f"<b>&emsp;Relevance score</b>: {relevant_score:.1f}",
            content="<b>&emsp;&emsp;Vectorstore score:</b>"
            f" {vectorstore_score}"
            f"{text_search_str}"