improve llm selection for simple reasoning pipeline

2024-03-28 16:35:13 +07:00
parent b2089245f2
commit f9cc40ca25
5 changed files with 44 additions and 13 deletions
--- a/libs/kotaemon/kotaemon/contribs/promptui/tunnel.py
+++ b/libs/kotaemon/kotaemon/contribs/promptui/tunnel.py
@@ -17,8 +17,7 @@ if machine == "x86_64":
 BINARY_REMOTE_NAME = f"frpc_{platform.system().lower()}_{machine.lower()}"
 EXTENSION = ".exe" if os.name == "nt" else ""
 BINARY_URL = (
-    "some-endpoint.com"
-    f"/kotaemon/tunneling/{VERSION}/{BINARY_REMOTE_NAME}{EXTENSION}"
+    "some-endpoint.com" f"/kotaemon/tunneling/{VERSION}/{BINARY_REMOTE_NAME}{EXTENSION}"
 )

 BINARY_FILENAME = f"{BINARY_REMOTE_NAME}_v{VERSION}"
--- a/libs/kotaemon/kotaemon/llms/chats/langchain_based.py
+++ b/libs/kotaemon/kotaemon/llms/chats/langchain_based.py
@@ -194,7 +194,6 @@ class ChatOpenAI(LCChatMixin, ChatLLM):  # type: ignore


 class AzureChatOpenAI(LCChatMixin, ChatLLM):  # type: ignore
-
    def __init__(
        self,
        azure_endpoint: str | None = None,
--- a/libs/ktem/ktem/components.py
+++ b/libs/ktem/ktem/components.py
@@ -1,4 +1,5 @@
 """Common components, some kind of config"""
+
 import logging
 from functools import cache
 from pathlib import Path
@@ -71,7 +72,7 @@ class ModelPool:
        }

    def options(self) -> dict:
-        """Present a list of models"""
+        """Present a dict of models"""
        return self._models

    def get_random_name(self) -> str:
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -1,5 +1,6 @@
 from __future__ import annotations

+import logging
 import shutil
 import warnings
 from collections import defaultdict
@@ -8,7 +9,7 @@ from hashlib import sha256
 from pathlib import Path
 from typing import Optional

-from ktem.components import embeddings, filestorage_path, llms
+from ktem.components import embeddings, filestorage_path
 from ktem.db.models import engine
 from llama_index.vector_stores import (
    FilterCondition,
@@ -25,10 +26,12 @@ from theflow.utils.modules import import_dotted_string
 from kotaemon.base import RetrievedDocument
 from kotaemon.indices import VectorIndexing, VectorRetrieval
 from kotaemon.indices.ingests import DocumentIngestor
-from kotaemon.indices.rankings import BaseReranking, LLMReranking
+from kotaemon.indices.rankings import BaseReranking

 from .base import BaseFileIndexIndexing, BaseFileIndexRetriever

+logger = logging.getLogger(__name__)
+

@lru_cache
 def dev_settings():
@@ -67,7 +70,7 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever):
    vector_retrieval: VectorRetrieval = VectorRetrieval.withx(
        embedding=embeddings.get_default(),
    )
-    reranker: BaseReranking = LLMReranking.withx(llm=llms.get_lowest_cost())
+    reranker: BaseReranking
    get_extra_table: bool = False

    def run(
@@ -153,7 +156,23 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever):

    @classmethod
    def get_user_settings(cls) -> dict:
+        from ktem.components import llms
+
+        try:
+            reranking_llm = llms.get_lowest_cost_name()
+            reranking_llm_choices = list(llms.options().keys())
+        except Exception as e:
+            logger.error(e)
+            reranking_llm = None
+            reranking_llm_choices = []
+
        return {
+            "reranking_llm": {
+                "name": "LLM for reranking",
+                "value": reranking_llm,
+                "component": "dropdown",
+                "choices": reranking_llm_choices,
+            },
            "separate_embedding": {
                "name": "Use separate embedding",
                "value": False,
@@ -185,7 +204,7 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever):
            },
            "use_reranking": {
                "name": "Use reranking",
-                "value": True,
+                "value": False,
                "choices": [True, False],
                "component": "checkbox",
            },
@@ -199,7 +218,10 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever):
            settings: the settings of the app
            kwargs: other arguments
        """
-        retriever = cls(get_extra_table=user_settings["prioritize_table"])
+        retriever = cls(
+            get_extra_table=user_settings["prioritize_table"],
+            reranker=user_settings["reranking_llm"],
+        )
        if not user_settings["use_reranking"]:
            retriever.reranker = None  # type: ignore

--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -159,6 +159,7 @@ class AnswerWithContextPipeline(BaseComponent):
    qa_table_template: str = DEFAULT_QA_TABLE_PROMPT
    qa_chatbot_template: str = DEFAULT_QA_CHATBOT_PROMPT

+    enable_citation: bool = False
    system_prompt: str = ""
    lang: str = "English"  # support English and Japanese

@@ -200,7 +201,8 @@ class AnswerWithContextPipeline(BaseComponent):
            lang=self.lang,
        )

-        if evidence:
+        citation_task = None
+        if evidence and self.enable_citation:
            citation_task = asyncio.create_task(
                self.citation_pipeline.ainvoke(context=evidence, question=question)
            )
@@ -226,7 +228,7 @@ class AnswerWithContextPipeline(BaseComponent):

        # retrieve the citation
        print("Waiting for citation task")
-        if evidence:
+        if citation_task is not None:
            citation = await citation_task
        else:
            citation = None
@@ -353,7 +355,15 @@ class FullQAPipeline(BaseReasoning):
        _id = cls.get_info()["id"]

        pipeline = FullQAPipeline(retrievers=retrievers)
-        pipeline.answering_pipeline.llm = llms.get_highest_accuracy()
+        pipeline.answering_pipeline.llm = llms[
+            settings[f"reasoning.options.{_id}.main_llm"]
+        ]
+        pipeline.answering_pipeline.citation_pipeline.llm = llms[
+            settings[f"reasoning.options.{_id}.citation_llm"]
+        ]
+        pipeline.answering_pipeline.enable_citation = settings[
+            f"reasoning.options.{_id}.highlight_citation"
+        ]
        pipeline.answering_pipeline.lang = {"en": "English", "ja": "Japanese"}.get(
            settings["reasoning.lang"], "English"
        )
@@ -384,7 +394,7 @@ class FullQAPipeline(BaseReasoning):
        return {
            "highlight_citation": {
                "name": "Highlight Citation",
-                "value": True,
+                "value": False,
                "component": "checkbox",
            },
            "citation_llm": {