feat: add inline citation style (#523) bump:minor

* feat: add URL quick index, export mindmap, refine UI & animation * fix: inject multimodal mode from env var * fix: minor update css * feat: add citation inline mode * fix: minor update citation inline pipeline * feat: add citation quick setting * fix: minor update * fix: minor update
2024-11-25 12:07:02 +07:00
parent 013f6f4103
commit 7e34e4343b
18 changed files with 1173 additions and 651 deletions
--- a/flowsettings.py
+++ b/flowsettings.py
@@ -255,7 +255,7 @@ KH_REASONINGS = [
    "ktem.reasoning.react.ReactAgentPipeline",
    "ktem.reasoning.rewoo.RewooAgentPipeline",
 ]
-KH_REASONINGS_USE_MULTIMODAL = False
+KH_REASONINGS_USE_MULTIMODAL = config("USE_MULTIMODAL", default=False, cast=bool)
 KH_VLM_ENDPOINT = "{0}/openai/deployments/{1}/chat/completions?api-version={2}".format(
    config("AZURE_OPENAI_ENDPOINT", default=""),
    config("OPENAI_VISION_DEPLOYMENT_NAME", default="gpt-4o"),
--- a/libs/kotaemon/kotaemon/indices/qa/init.py
+++ b/libs/kotaemon/kotaemon/indices/qa/init.py
@@ -1,7 +1,5 @@
 from .citation import CitationPipeline
-from .text_based import CitationQAPipeline

 __all__ = [
    "CitationPipeline",
-    "CitationQAPipeline",
 ]
--- a/libs/kotaemon/kotaemon/indices/qa/citation_qa.py
+++ b/libs/kotaemon/kotaemon/indices/qa/citation_qa.py
@@ -0,0 +1,390 @@
+import threading
+from collections import defaultdict
+from typing import Generator
+
+import numpy as np
+from theflow.settings import settings as flowsettings
+
+from kotaemon.base import (
+    AIMessage,
+    BaseComponent,
+    Document,
+    HumanMessage,
+    Node,
+    SystemMessage,
+)
+from kotaemon.llms import ChatLLM, PromptTemplate
+
+from .citation import CitationPipeline
+from .format_context import (
+    EVIDENCE_MODE_FIGURE,
+    EVIDENCE_MODE_TABLE,
+    EVIDENCE_MODE_TEXT,
+)
+from .utils import find_text
+
+try:
+    from ktem.llms.manager import llms
+    from ktem.reasoning.prompt_optimization.mindmap import CreateMindmapPipeline
+    from ktem.utils.render import Render
+except ImportError:
+    raise ImportError("Please install `ktem` to use this component")
+
+MAX_IMAGES = 10
+CITATION_TIMEOUT = 5.0
+CONTEXT_RELEVANT_WARNING_SCORE = 0.7
+
+DEFAULT_QA_TEXT_PROMPT = (
+    "Use the following pieces of context to answer the question at the end in detail with clear explanation. "  # noqa: E501
+    "If you don't know the answer, just say that you don't know, don't try to "
+    "make up an answer. Give answer in "
+    "{lang}.\n\n"
+    "{context}\n"
+    "Question: {question}\n"
+    "Helpful Answer:"
+)
+
+DEFAULT_QA_TABLE_PROMPT = (
+    "Use the given context: texts, tables, and figures below to answer the question, "
+    "then provide answer with clear explanation."
+    "If you don't know the answer, just say that you don't know, "
+    "don't try to make up an answer. Give answer in {lang}.\n\n"
+    "Context:\n"
+    "{context}\n"
+    "Question: {question}\n"
+    "Helpful Answer:"
+)  # noqa
+
+DEFAULT_QA_CHATBOT_PROMPT = (
+    "Pick the most suitable chatbot scenarios to answer the question at the end, "
+    "output the provided answer text. If you don't know the answer, "
+    "just say that you don't know. Keep the answer as concise as possible. "
+    "Give answer in {lang}.\n\n"
+    "Context:\n"
+    "{context}\n"
+    "Question: {question}\n"
+    "Answer:"
+)  # noqa
+
+DEFAULT_QA_FIGURE_PROMPT = (
+    "Use the given context: texts, tables, and figures below to answer the question. "
+    "If you don't know the answer, just say that you don't know. "
+    "Give answer in {lang}.\n\n"
+    "Context: \n"
+    "{context}\n"
+    "Question: {question}\n"
+    "Answer: "
+)  # noqa
+
+
+class AnswerWithContextPipeline(BaseComponent):
+    """Answer the question based on the evidence
+
+    Args:
+        llm: the language model to generate the answer
+        citation_pipeline: generates citation from the evidence
+        qa_template: the prompt template for LLM to generate answer (refer to
+            evidence_mode)
+        qa_table_template: the prompt template for LLM to generate answer for table
+            (refer to evidence_mode)
+        qa_chatbot_template: the prompt template for LLM to generate answer for
+            pre-made scenarios (refer to evidence_mode)
+        lang: the language of the answer. Currently support English and Japanese
+    """
+
+    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())
+    vlm_endpoint: str = getattr(flowsettings, "KH_VLM_ENDPOINT", "")
+    use_multimodal: bool = getattr(flowsettings, "KH_REASONINGS_USE_MULTIMODAL", True)
+    citation_pipeline: CitationPipeline = Node(
+        default_callback=lambda _: CitationPipeline(llm=llms.get_default())
+    )
+    create_mindmap_pipeline: CreateMindmapPipeline = Node(
+        default_callback=lambda _: CreateMindmapPipeline(llm=llms.get_default())
+    )
+
+    qa_template: str = DEFAULT_QA_TEXT_PROMPT
+    qa_table_template: str = DEFAULT_QA_TABLE_PROMPT
+    qa_chatbot_template: str = DEFAULT_QA_CHATBOT_PROMPT
+    qa_figure_template: str = DEFAULT_QA_FIGURE_PROMPT
+
+    enable_citation: bool = False
+    enable_mindmap: bool = False
+    enable_citation_viz: bool = False
+
+    system_prompt: str = ""
+    lang: str = "English"  # support English and Japanese
+    n_last_interactions: int = 5
+
+    def get_prompt(self, question, evidence, evidence_mode: int):
+        """Prepare the prompt and other information for LLM"""
+        if evidence_mode == EVIDENCE_MODE_TEXT:
+            prompt_template = PromptTemplate(self.qa_template)
+        elif evidence_mode == EVIDENCE_MODE_TABLE:
+            prompt_template = PromptTemplate(self.qa_table_template)
+        elif evidence_mode == EVIDENCE_MODE_FIGURE:
+            if self.use_multimodal:
+                prompt_template = PromptTemplate(self.qa_figure_template)
+            else:
+                prompt_template = PromptTemplate(self.qa_template)
+        else:
+            prompt_template = PromptTemplate(self.qa_chatbot_template)
+
+        prompt = prompt_template.populate(
+            context=evidence,
+            question=question,
+            lang=self.lang,
+        )
+
+        return prompt, evidence
+
+    def run(
+        self, question: str, evidence: str, evidence_mode: int = 0, **kwargs
+    ) -> Document:
+        return self.invoke(question, evidence, evidence_mode, **kwargs)
+
+    def invoke(
+        self,
+        question: str,
+        evidence: str,
+        evidence_mode: int = 0,
+        images: list[str] = [],
+        **kwargs,
+    ) -> Document:
+        raise NotImplementedError
+
+    async def ainvoke(  # type: ignore
+        self,
+        question: str,
+        evidence: str,
+        evidence_mode: int = 0,
+        images: list[str] = [],
+        **kwargs,
+    ) -> Document:
+        """Answer the question based on the evidence
+
+        In addition to the question and the evidence, this method also take into
+        account evidence_mode. The evidence_mode tells which kind of evidence is.
+        The kind of evidence affects:
+            1. How the evidence is represented.
+            2. The prompt to generate the answer.
+
+        By default, the evidence_mode is 0, which means the evidence is plain text with
+        no particular semantic representation. The evidence_mode can be:
+            1. "table": There will be HTML markup telling that there is a table
+                within the evidence.
+            2. "chatbot": There will be HTML markup telling that there is a chatbot.
+                This chatbot is a scenario, extracted from an Excel file, where each
+                row corresponds to an interaction.
+
+        Args:
+            question: the original question posed by user
+            evidence: the text that contain relevant information to answer the question
+                (determined by retrieval pipeline)
+            evidence_mode: the mode of evidence, 0 for text, 1 for table, 2 for chatbot
+        """
+        raise NotImplementedError
+
+    def stream(  # type: ignore
+        self,
+        question: str,
+        evidence: str,
+        evidence_mode: int = 0,
+        images: list[str] = [],
+        **kwargs,
+    ) -> Generator[Document, None, Document]:
+        history = kwargs.get("history", [])
+        print(f"Got {len(images)} images")
+        # check if evidence exists, use QA prompt
+        if evidence:
+            prompt, evidence = self.get_prompt(question, evidence, evidence_mode)
+        else:
+            prompt = question
+
+        # retrieve the citation
+        citation = None
+        mindmap = None
+
+        def citation_call():
+            nonlocal citation
+            citation = self.citation_pipeline(context=evidence, question=question)
+
+        def mindmap_call():
+            nonlocal mindmap
+            mindmap = self.create_mindmap_pipeline(context=evidence, question=question)
+
+        citation_thread = None
+        mindmap_thread = None
+
+        # execute function call in thread
+        if evidence:
+            if self.enable_citation:
+                citation_thread = threading.Thread(target=citation_call)
+                citation_thread.start()
+
+            if self.enable_mindmap:
+                mindmap_thread = threading.Thread(target=mindmap_call)
+                mindmap_thread.start()
+
+        output = ""
+        logprobs = []
+
+        messages = []
+        if self.system_prompt:
+            messages.append(SystemMessage(content=self.system_prompt))
+
+        for human, ai in history[-self.n_last_interactions :]:
+            messages.append(HumanMessage(content=human))
+            messages.append(AIMessage(content=ai))
+
+        if self.use_multimodal and evidence_mode == EVIDENCE_MODE_FIGURE:
+            # create image message:
+            messages.append(
+                HumanMessage(
+                    content=[
+                        {"type": "text", "text": prompt},
+                    ]
+                    + [
+                        {
+                            "type": "image_url",
+                            "image_url": {"url": image},
+                        }
+                        for image in images[:MAX_IMAGES]
+                    ],
+                )
+            )
+        else:
+            # append main prompt
+            messages.append(HumanMessage(content=prompt))
+
+        try:
+            # try streaming first
+            print("Trying LLM streaming")
+            for out_msg in self.llm.stream(messages):
+                output += out_msg.text
+                logprobs += out_msg.logprobs
+                yield Document(channel="chat", content=out_msg.text)
+        except NotImplementedError:
+            print("Streaming is not supported, falling back to normal processing")
+            output = self.llm(messages).text
+            yield Document(channel="chat", content=output)
+
+        if logprobs:
+            qa_score = np.exp(np.average(logprobs))
+        else:
+            qa_score = None
+
+        if citation_thread:
+            citation_thread.join(timeout=CITATION_TIMEOUT)
+        if mindmap_thread:
+            mindmap_thread.join(timeout=CITATION_TIMEOUT)
+
+        answer = Document(
+            text=output,
+            metadata={
+                "citation_viz": self.enable_citation_viz,
+                "mindmap": mindmap,
+                "citation": citation,
+                "qa_score": qa_score,
+            },
+        )
+
+        return answer
+
+    def match_evidence_with_context(self, answer, docs) -> dict[str, list[dict]]:
+        """Match the evidence with the context"""
+        spans: dict[str, list[dict]] = defaultdict(list)
+
+        if not answer.metadata["citation"]:
+            return spans
+
+        evidences = answer.metadata["citation"].evidences
+        for quote in evidences:
+            matched_excerpts = []
+            for doc in docs:
+                matches = find_text(quote, doc.text)
+
+                for start, end in matches:
+                    if "|" not in doc.text[start:end]:
+                        spans[doc.doc_id].append(
+                            {
+                                "start": start,
+                                "end": end,
+                            }
+                        )
+                        matched_excerpts.append(doc.text[start:end])
+
+            # print("Matched citation:", quote, matched_excerpts),
+        return spans
+
+    def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document]]:
+        """Prepare the citations to show on the UI"""
+        with_citation, without_citation = [], []
+        has_llm_score = any("llm_trulens_score" in doc.metadata for doc in docs)
+
+        spans = self.match_evidence_with_context(answer, docs)
+        id2docs = {doc.doc_id: doc for doc in docs}
+        not_detected = set(id2docs.keys()) - set(spans.keys())
+
+        # render highlight spans
+        for _id, ss in spans.items():
+            if not ss:
+                not_detected.add(_id)
+                continue
+            cur_doc = id2docs[_id]
+            highlight_text = ""
+
+            ss = sorted(ss, key=lambda x: x["start"])
+            text = cur_doc.text[: ss[0]["start"]]
+            for idx, span in enumerate(ss):
+                to_highlight = cur_doc.text[span["start"] : span["end"]]
+                if len(to_highlight) > len(highlight_text):
+                    highlight_text = to_highlight
+
+                span_idx = span.get("idx", None)
+                if span_idx is not None:
+                    to_highlight = f"【{span_idx + 1}】" + to_highlight
+
+                text += Render.highlight(
+                    to_highlight,
+                    elem_id=str(span_idx + 1) if span_idx is not None else None,
+                )
+                print(text)
+                if idx < len(ss) - 1:
+                    text += cur_doc.text[span["end"] : ss[idx + 1]["start"]]
+            text += cur_doc.text[ss[-1]["end"] :]
+            # add to display list
+            with_citation.append(
+                Document(
+                    channel="info",
+                    content=Render.collapsible_with_header_score(
+                        cur_doc,
+                        override_text=text,
+                        highlight_text=highlight_text,
+                        open_collapsible=True,
+                    ),
+                )
+            )
+
+        print("Got {} cited docs".format(len(with_citation)))
+
+        sorted_not_detected_items_with_scores = [
+            (id_, id2docs[id_].metadata.get("llm_trulens_score", 0.0))
+            for id_ in not_detected
+        ]
+        sorted_not_detected_items_with_scores.sort(key=lambda x: x[1], reverse=True)
+
+        for id_, _ in sorted_not_detected_items_with_scores:
+            doc = id2docs[id_]
+            doc_score = doc.metadata.get("llm_trulens_score", 0.0)
+            is_open = not has_llm_score or (
+                doc_score > CONTEXT_RELEVANT_WARNING_SCORE and len(with_citation) == 0
+            )
+            without_citation.append(
+                Document(
+                    channel="info",
+                    content=Render.collapsible_with_header_score(
+                        doc, open_collapsible=is_open
+                    ),
+                )
+            )
+        return with_citation, without_citation
--- a/libs/kotaemon/kotaemon/indices/qa/citation_qa_inline.py
+++ b/libs/kotaemon/kotaemon/indices/qa/citation_qa_inline.py
@@ -0,0 +1,267 @@
+import re
+import threading
+from collections import defaultdict
+from typing import Generator
+
+import numpy as np
+
+from kotaemon.base import AIMessage, Document, HumanMessage, SystemMessage
+from kotaemon.llms import PromptTemplate
+
+from .citation import CiteEvidence
+from .citation_qa import CITATION_TIMEOUT, MAX_IMAGES, AnswerWithContextPipeline
+from .format_context import EVIDENCE_MODE_FIGURE
+from .utils import find_start_end_phrase
+
+DEFAULT_QA_CITATION_PROMPT = """
+Use the following pieces of context to answer the question at the end.
+Provide DETAILED ansswer with clear explanation.
+Format answer with easy to follow bullets / paragraphs.
+If you don't know the answer, just say that you don't know, don't try to make up an answer.
+Use the same language as the question to response.
+
+CONTEXT:
+----
+{context}
+----
+
+Answer using this format:
+CITATION LIST
+
+// the index in this array
+CITATION【number】
+
+// output 2 phrase to mark start and end of the relevant span
+// each has ~ 6 words
+// MUST COPY EXACTLY from the CONTEXT
+// NO CHANGE or REPHRASE
+// RELEVANT_SPAN_FROM_CONTEXT
+START_PHRASE: string
+END_PHRASE: string
+
+// When you answer, ensure to add citations from the documents
+// in the CONTEXT with a number that corresponds to the answersInText array.
+// (in the form [number])
+// Try to include the number after each facts / statements you make.
+// You can create as many citations as you need.
+FINAL ANSWER
+string
+
+STRICTLY FOLLOW THIS EXAMPLE:
+CITATION LIST
+
+CITATION【1】
+
+START_PHRASE: Known as fixed-size chunking , the traditional
+END_PHRASE: not degrade the final retrieval performance.
+
+CITATION【2】
+
+START_PHRASE: Fixed-size Chunker This is our baseline chunker
+END_PHRASE: this shows good retrieval quality.
+
+FINAL ANSWER
+An alternative to semantic chunking is fixed-size chunking. This traditional method involves splitting documents into chunks of a predetermined or user-specified size, regardless of semantic content, which is computationally efficient【1】. However, it may result in the fragmentation of semantically related content, thereby potentially degrading retrieval performance【2】.
+
+QUESTION: {question}\n
+ANSWER:
+"""  # noqa
+
+
+class AnswerWithInlineCitation(AnswerWithContextPipeline):
+    """Answer the question based on the evidence with inline citation"""
+
+    qa_citation_template: str = DEFAULT_QA_CITATION_PROMPT
+
+    def get_prompt(self, question, evidence, evidence_mode: int):
+        """Prepare the prompt and other information for LLM"""
+        prompt_template = PromptTemplate(self.qa_citation_template)
+
+        prompt = prompt_template.populate(
+            context=evidence,
+            question=question,
+            safe=False,
+        )
+
+        return prompt, evidence
+
+    def answer_to_citations(self, answer):
+        evidences = []
+        lines = answer.split("\n")
+        for line in lines:
+            for keyword in ["START_PHRASE:", "END_PHRASE:"]:
+                if line.startswith(keyword):
+                    evidences.append(line[len(keyword) :].strip())
+
+        return CiteEvidence(evidences=evidences)
+
+    def replace_citation_with_link(self, answer: str):
+        # Define the regex pattern to match 【number】
+        pattern = r"【\d+】"
+        matches = re.finditer(pattern, answer)
+
+        matched_citations = set()
+        for match in matches:
+            citation = match.group()
+            matched_citations.add(citation)
+
+        for citation in matched_citations:
+            print("Found citation:", citation)
+            answer = answer.replace(
+                citation,
+                (
+                    "<a href='#' class='citation' "
+                    f"id='mark-{citation[1:-1]}'>{citation}</a>"
+                ),
+            )
+
+        print("Replaced answer:", answer)
+        return answer
+
+    def stream(  # type: ignore
+        self,
+        question: str,
+        evidence: str,
+        evidence_mode: int = 0,
+        images: list[str] = [],
+        **kwargs,
+    ) -> Generator[Document, None, Document]:
+        history = kwargs.get("history", [])
+        print(f"Got {len(images)} images")
+        # check if evidence exists, use QA prompt
+        if evidence:
+            prompt, evidence = self.get_prompt(question, evidence, evidence_mode)
+        else:
+            prompt = question
+
+        output = ""
+        logprobs = []
+
+        citation = None
+        mindmap = None
+
+        def mindmap_call():
+            nonlocal mindmap
+            mindmap = self.create_mindmap_pipeline(context=evidence, question=question)
+
+        mindmap_thread = None
+
+        # execute function call in thread
+        if evidence:
+            if self.enable_mindmap:
+                mindmap_thread = threading.Thread(target=mindmap_call)
+                mindmap_thread.start()
+
+        messages = []
+        if self.system_prompt:
+            messages.append(SystemMessage(content=self.system_prompt))
+
+        for human, ai in history[-self.n_last_interactions :]:
+            messages.append(HumanMessage(content=human))
+            messages.append(AIMessage(content=ai))
+
+        if self.use_multimodal and evidence_mode == EVIDENCE_MODE_FIGURE:
+            # create image message:
+            messages.append(
+                HumanMessage(
+                    content=[
+                        {"type": "text", "text": prompt},
+                    ]
+                    + [
+                        {
+                            "type": "image_url",
+                            "image_url": {"url": image},
+                        }
+                        for image in images[:MAX_IMAGES]
+                    ],
+                )
+            )
+        else:
+            # append main prompt
+            messages.append(HumanMessage(content=prompt))
+
+        START_ANSWER = "FINAL ANSWER"
+        start_of_answer = True
+        final_answer = ""
+
+        try:
+            # try streaming first
+            print("Trying LLM streaming")
+            for out_msg in self.llm.stream(messages):
+                if START_ANSWER in output:
+                    final_answer += (
+                        out_msg.text.lstrip() if start_of_answer else out_msg.text
+                    )
+                    start_of_answer = False
+                    yield Document(channel="chat", content=out_msg.text)
+
+                output += out_msg.text
+                logprobs += out_msg.logprobs
+        except NotImplementedError:
+            print("Streaming is not supported, falling back to normal processing")
+            output = self.llm(messages).text
+            yield Document(channel="chat", content=output)
+
+        if logprobs:
+            qa_score = np.exp(np.average(logprobs))
+        else:
+            qa_score = None
+
+        citation = self.answer_to_citations(output)
+
+        if mindmap_thread:
+            mindmap_thread.join(timeout=CITATION_TIMEOUT)
+
+        # convert citation to link
+        answer = Document(
+            text=final_answer,
+            metadata={
+                "citation_viz": self.enable_citation_viz,
+                "mindmap": mindmap,
+                "citation": citation,
+                "qa_score": qa_score,
+            },
+        )
+
+        # yield the final answer
+        final_answer = self.replace_citation_with_link(final_answer)
+        yield Document(channel="chat", content=None)
+        yield Document(channel="chat", content=final_answer)
+
+        return answer
+
+    def match_evidence_with_context(self, answer, docs) -> dict[str, list[dict]]:
+        """Match the evidence with the context"""
+        spans: dict[str, list[dict]] = defaultdict(list)
+
+        if not answer.metadata["citation"]:
+            return spans
+
+        evidences = answer.metadata["citation"].evidences
+
+        for start_idx in range(0, len(evidences), 2):
+            start_phrase, end_phrase = evidences[start_idx : start_idx + 2]
+            best_match = None
+            best_match_length = 0
+            best_match_doc_idx = None
+
+            for doc in docs:
+                match, match_length = find_start_end_phrase(
+                    start_phrase, end_phrase, doc.text
+                )
+                if best_match is None or (
+                    match is not None and match_length > best_match_length
+                ):
+                    best_match = match
+                    best_match_length = match_length
+                    best_match_doc_idx = doc.doc_id
+
+            if best_match is not None and best_match_doc_idx is not None:
+                spans[best_match_doc_idx].append(
+                    {
+                        "start": best_match[0],
+                        "end": best_match[1],
+                        "idx": start_idx // 2,  # implicitly set from the start_idx
+                    }
+                )
+        return spans
--- a/libs/kotaemon/kotaemon/indices/qa/format_context.py
+++ b/libs/kotaemon/kotaemon/indices/qa/format_context.py
@@ -0,0 +1,114 @@
+import html
+from functools import partial
+
+import tiktoken
+
+from kotaemon.base import BaseComponent, Document, RetrievedDocument
+from kotaemon.indices.splitters import TokenSplitter
+
+EVIDENCE_MODE_TEXT = 0
+EVIDENCE_MODE_TABLE = 1
+EVIDENCE_MODE_CHATBOT = 2
+EVIDENCE_MODE_FIGURE = 3
+
+
+class PrepareEvidencePipeline(BaseComponent):
+    """Prepare the evidence text from the list of retrieved documents
+
+    This step usually happens after `DocumentRetrievalPipeline`.
+
+    Args:
+        trim_func: a callback function or a BaseComponent, that splits a large
+            chunk of text into smaller ones. The first one will be retained.
+    """
+
+    max_context_length: int = 32000
+    trim_func: TokenSplitter | None = None
+
+    def run(self, docs: list[RetrievedDocument]) -> Document:
+        evidence = ""
+        images = []
+        table_found = 0
+        evidence_modes = []
+
+        evidence_trim_func = (
+            self.trim_func
+            if self.trim_func
+            else TokenSplitter(
+                chunk_size=self.max_context_length,
+                chunk_overlap=0,
+                separator=" ",
+                tokenizer=partial(
+                    tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
+                    allowed_special=set(),
+                    disallowed_special="all",
+                ),
+            )
+        )
+
+        for _, retrieved_item in enumerate(docs):
+            retrieved_content = ""
+            page = retrieved_item.metadata.get("page_label", None)
+            source = filename = retrieved_item.metadata.get("file_name", "-")
+            if page:
+                source += f" (Page {page})"
+            if retrieved_item.metadata.get("type", "") == "table":
+                evidence_modes.append(EVIDENCE_MODE_TABLE)
+                if table_found < 5:
+                    retrieved_content = retrieved_item.metadata.get(
+                        "table_origin", retrieved_item.text
+                    )
+                    if retrieved_content not in evidence:
+                        table_found += 1
+                        evidence += (
+                            f"<br><b>Table from {source}</b>\n"
+                            + retrieved_content
+                            + "\n<br>"
+                        )
+            elif retrieved_item.metadata.get("type", "") == "chatbot":
+                evidence_modes.append(EVIDENCE_MODE_CHATBOT)
+                retrieved_content = retrieved_item.metadata["window"]
+                evidence += (
+                    f"<br><b>Chatbot scenario from {filename} (Row {page})</b>\n"
+                    + retrieved_content
+                    + "\n<br>"
+                )
+            elif retrieved_item.metadata.get("type", "") == "image":
+                evidence_modes.append(EVIDENCE_MODE_FIGURE)
+                retrieved_content = retrieved_item.metadata.get("image_origin", "")
+                retrieved_caption = html.escape(retrieved_item.get_content())
+                evidence += (
+                    f"<br><b>Figure from {source}</b>\n"
+                    + "<img width='85%' src='<src>' "
+                    + f"alt='{retrieved_caption}'/>"
+                    + "\n<br>"
+                )
+                images.append(retrieved_content)
+            else:
+                if "window" in retrieved_item.metadata:
+                    retrieved_content = retrieved_item.metadata["window"]
+                else:
+                    retrieved_content = retrieved_item.text
+                retrieved_content = retrieved_content.replace("\n", " ")
+                if retrieved_content not in evidence:
+                    evidence += (
+                        f"<br><b>Content from {source}: </b> "
+                        + retrieved_content
+                        + " \n<br>"
+                    )
+
+        # resolve evidence mode
+        evidence_mode = EVIDENCE_MODE_TEXT
+        if EVIDENCE_MODE_FIGURE in evidence_modes:
+            evidence_mode = EVIDENCE_MODE_FIGURE
+        elif EVIDENCE_MODE_TABLE in evidence_modes:
+            evidence_mode = EVIDENCE_MODE_TABLE
+
+        # trim context by trim_len
+        print("len (original)", len(evidence))
+        if evidence:
+            texts = evidence_trim_func([Document(text=evidence)])
+            evidence = texts[0].text
+            print("len (trimmed)", len(evidence))
+
+        return Document(content=(evidence_mode, evidence, images))
--- a/libs/kotaemon/kotaemon/indices/qa/text_based.py
+++ b/libs/kotaemon/kotaemon/indices/qa/text_based.py
@@ -1,63 +0,0 @@
-import os
-
-from kotaemon.base import BaseComponent, Document, Node, RetrievedDocument
-from kotaemon.llms import BaseLLM, LCAzureChatOpenAI, PromptTemplate
-
-from .citation import CitationPipeline
-
-
-class CitationQAPipeline(BaseComponent):
-    """Answering question from a text corpus with citation"""
-
-    qa_prompt_template: PromptTemplate = PromptTemplate(
-        'Answer the following question: "{question}". '
-        "The context is: \n{context}\nAnswer: "
-    )
-    llm: BaseLLM = LCAzureChatOpenAI.withx(
-        azure_endpoint="https://bleh-dummy.openai.azure.com/",
-        openai_api_key=os.environ.get("OPENAI_API_KEY", ""),
-        openai_api_version="2023-07-01-preview",
-        deployment_name="dummy-q2-16k",
-        temperature=0,
-        request_timeout=60,
-    )
-    citation_pipeline: CitationPipeline = Node(
-        default_callback=lambda self: CitationPipeline(llm=self.llm)
-    )
-
-    def _format_doc_text(self, text: str) -> str:
-        """Format the text of each document"""
-        return text.replace("\n", " ")
-
-    def _format_retrieved_context(self, documents: list[RetrievedDocument]) -> str:
-        """Format the texts between all documents"""
-        matched_texts: list[str] = [
-            self._format_doc_text(doc.text) for doc in documents
-        ]
-        return "\n\n".join(matched_texts)
-
-    def run(
-        self,
-        question: str,
-        documents: list[RetrievedDocument],
-        use_citation: bool = False,
-        **kwargs
-    ) -> Document:
-        # retrieve relevant documents as context
-        context = self._format_retrieved_context(documents)
-        self.log_progress(".context", context=context)
-
-        # generate the answer
-        prompt = self.qa_prompt_template.populate(
-            context=context,
-            question=question,
-        )
-        self.log_progress(".prompt", prompt=prompt)
-        answer_text = self.llm(prompt).text
-        if use_citation:
-            citation = self.citation_pipeline(context=context, question=question)
-        else:
-            citation = None
-
-        answer = Document(text=answer_text, metadata={"citation": citation})
-        return answer
--- a/libs/kotaemon/kotaemon/indices/qa/utils.py
+++ b/libs/kotaemon/kotaemon/indices/qa/utils.py
@@ -0,0 +1,53 @@
+from difflib import SequenceMatcher
+
+
+def find_text(search_span, context, min_length=5):
+    sentence_list = search_span.split("\n")
+    context = context.replace("\n", " ")
+
+    matches = []
+    # don't search for small text
+    if len(search_span) > min_length:
+        for sentence in sentence_list:
+            match = SequenceMatcher(
+                None, sentence, context, autojunk=False
+            ).find_longest_match()
+            if match.size > max(len(sentence) * 0.35, min_length):
+                matches.append((match.b, match.b + match.size))
+
+    return matches
+
+
+def find_start_end_phrase(
+    start_phrase, end_phrase, context, min_length=5, max_excerpt_length=300
+):
+    context = context.replace("\n", " ")
+
+    matches = []
+    matched_length = 0
+    for sentence in [start_phrase, end_phrase]:
+        match = SequenceMatcher(
+            None, sentence, context, autojunk=False
+        ).find_longest_match()
+        if match.size > max(len(sentence) * 0.35, min_length):
+            matches.append((match.b, match.b + match.size))
+            matched_length += match.size
+
+    # check if second match is before the first match
+    if len(matches) == 2 and matches[1][0] < matches[0][0]:
+        # if so, keep only the first match
+        matches = [matches[0]]
+
+    if matches:
+        start_idx = min(start for start, _ in matches)
+        end_idx = max(end for _, end in matches)
+
+        # check if the excerpt is too long
+        if end_idx - start_idx > max_excerpt_length:
+            end_idx = start_idx + max_excerpt_length
+
+        final_match = (start_idx, end_idx)
+    else:
+        final_match = None
+
+    return final_match, matched_length
--- a/libs/kotaemon/kotaemon/indices/vectorindex.py
+++ b/libs/kotaemon/kotaemon/indices/vectorindex.py
@@ -42,14 +42,6 @@ class VectorIndexing(BaseIndexing):
            **kwargs,
        )

-    def to_qa_pipeline(self, *args, **kwargs):
-        from .qa import CitationQAPipeline
-
-        return TextVectorQA(
-            retrieving_pipeline=self.to_retrieval_pipeline(**kwargs),
-            qa_pipeline=CitationQAPipeline(**kwargs),
-        )
-
    def write_chunk_to_file(self, docs: list[Document]):
        # save the chunks content into markdown format
        if self.cache_dir:
--- a/libs/kotaemon/kotaemon/llms/prompts/template.py
+++ b/libs/kotaemon/kotaemon/llms/prompts/template.py
@@ -72,7 +72,7 @@ class PromptTemplate:
                UserWarning,
            )

-    def populate(self, **kwargs) -> str:
+    def populate(self, safe=True, **kwargs) -> str:
        """
        Strictly populate the template with the given keyword arguments.

@@ -86,6 +86,7 @@ class PromptTemplate:
        Raises:
            ValueError: If an unknown placeholder is provided.
        """
+        if safe:
            self.check_missing_kwargs(**kwargs)

        return self.partial_populate(**kwargs)
--- a/libs/ktem/ktem/assets/css/main.css
+++ b/libs/ktem/ktem/assets/css/main.css
@@ -97,6 +97,11 @@ button.selected {
 #chat-info-panel {
  max-height: var(--main-area-height) !important;
  overflow: auto !important;
+  transition: all 0.5s;
+}
+
+body.dark #chat-info-panel figure>img{
+  filter: invert(100%);
 }

 #conv-settings-panel {
@@ -199,11 +204,26 @@ mark {
  right: 15px;
 }

-/* #new-conv-button > img {
-  position: relative;
-  top: 0px;
-  right: -50%;
-} */
+#use-mindmap-checkbox {
+  position: absolute;
+  width: 110px;
+  top: 10px;
+  right: 25px;
+}
+
+#quick-url textarea {
+  resize: none;
+  background: transparent;
+  margin-top: 0px;
+}
+
+#quick-url textarea::placeholder {
+  text-align: center;
+}
+
+#quick-file {
+  height: 110px;
+}

 span.icon {
  color: #cecece;
@@ -225,11 +245,6 @@ span.icon {
  overflow: unset !important;
 }

-/*body {*/
-/*  margin: 0;*/
-/*  font-family: Arial, sans-serif;*/
-/*}*/
-
 pdfjs-viewer-element {
  height: 100vh;
  height: 100dvh;
@@ -280,8 +295,7 @@ pdfjs-viewer-element {
  overflow: auto;
 }

-/** Switch
- -------------------------------------*/
+/* Switch checkbox styles */

 #is-public-checkbox {
  position: relative;
@@ -293,10 +307,6 @@ pdfjs-viewer-element {
  opacity: 0;
 }

-/**
- * 1. Adjust this to size
- */
-
 .switch {
  display: inline-block;
  /* 1 */
@@ -330,3 +340,28 @@ pdfjs-viewer-element {
 .switch:has(> input:checked) {
  background: #0c895f;
 }
+
+/* Bot animation */
+
+.message.bot {
+  animation: fadein 1.5s ease-in-out forwards;
+}
+
+details.evidence {
+  animation: fadein 0.5s ease-in-out forwards;
+}
+
+@keyframes fadein {
+  0% {
+    opacity: 0;
+  }
+
+  100% {
+    opacity: 100%;
+  }
+}
+
+.message a.citation {
+  color: #10b981;
+  text-decoration: none;
+}
--- a/libs/ktem/ktem/assets/js/main.js
+++ b/libs/ktem/ktem/assets/js/main.js
@@ -16,6 +16,11 @@ function run() {
  let chat_info_panel = document.getElementById("info-expand");
  chat_info_panel.insertBefore(info_expand_button, chat_info_panel.childNodes[2]);

+  // move use mind-map checkbox
+  let mindmap_checkbox = document.getElementById("use-mindmap-checkbox");
+  let chat_setting_panel = document.getElementById("chat-settings-expand");
+  chat_setting_panel.insertBefore(mindmap_checkbox, chat_setting_panel.childNodes[2]);
+
  // create slider toggle
  const is_public_checkbox = document.getElementById("is-public-checkbox");
  const label_element = is_public_checkbox.getElementsByTagName("label")[0];
@@ -49,4 +54,21 @@ function run() {
  globalThis.removeFromStorage = (key) => {
      localStorage.removeItem(key)
  }
+
+  // Function to scroll to given citation with ID
+  // Sleep function using Promise and setTimeout
+  function sleep(ms) {
+      return new Promise(resolve => setTimeout(resolve, ms));
+  }
+
+  globalThis.scrollToCitation = async (event) => {
+      event.preventDefault(); // Prevent the default link behavior
+      var citationId = event.target.getAttribute('id');
+
+      await sleep(100); // Sleep for 500 milliseconds
+      var citation = document.querySelector('mark[id="' + citationId + '"]');
+      if (citation) {
+          citation.scrollIntoView({ behavior: 'smooth' });
+      }
+  }
 }
--- a/libs/ktem/ktem/db/base_models.py
+++ b/libs/ktem/ktem/db/base_models.py
@@ -25,8 +25,8 @@ class BaseConversation(SQLModel):
        default_factory=lambda: uuid.uuid4().hex, primary_key=True, index=True
    )
    name: str = Field(
-        default_factory=lambda: datetime.datetime.now(get_localzone()).strftime(
-            "%Y-%m-%d %H:%M:%S"
+        default_factory=lambda: "Untitled - {}".format(
+            datetime.datetime.now(get_localzone()).strftime("%Y-%m-%d %H:%M:%S")
        )
    )
    user: int = Field(default=0)  # For now we only have one user
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -126,6 +126,9 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever):
        if doc_ids:
            flatten_doc_ids = []
            for doc_id in doc_ids:
+                if doc_id is None:
+                    raise ValueError("No document is selected")
+
                if doc_id.startswith("["):
                    flatten_doc_ids.extend(json.loads(doc_id))
                else:
--- a/libs/ktem/ktem/index/file/ui.py
+++ b/libs/ktem/ktem/index/file/ui.py
@@ -22,6 +22,13 @@ from theflow.settings import settings as flowsettings
 DOWNLOAD_MESSAGE = "Press again to download"
 MAX_FILENAME_LENGTH = 20

+chat_input_focus_js = """
+function() {
+    let chatInput = document.querySelector("#chat-input textarea");
+    chatInput.focus();
+}
+"""
+

 class File(gr.File):
    """Subclass from gr.File to maintain the original filename
@@ -666,7 +673,7 @@ class FileIndexPage(BasePage):
                        outputs=self._app.chat_page.quick_file_upload_status,
                    )
                    .then(
-                        fn=self.index_fn_with_default_loaders,
+                        fn=self.index_fn_file_with_default_loaders,
                        inputs=[
                            self._app.chat_page.quick_file_upload,
                            gr.State(value=False),
@@ -689,6 +696,38 @@ class FileIndexPage(BasePage):
                for event in self._app.get_event(f"onFileIndex{self._index.id}Changed"):
                    quickUploadedEvent = quickUploadedEvent.then(**event)

+                quickURLUploadedEvent = (
+                    self._app.chat_page.quick_urls.submit(
+                        fn=lambda: gr.update(
+                            value="Please wait for the indexing process "
+                            "to complete before adding your question."
+                        ),
+                        outputs=self._app.chat_page.quick_file_upload_status,
+                    )
+                    .then(
+                        fn=self.index_fn_url_with_default_loaders,
+                        inputs=[
+                            self._app.chat_page.quick_urls,
+                            gr.State(value=True),
+                            self._app.settings_state,
+                            self._app.user_id,
+                        ],
+                        outputs=self.quick_upload_state,
+                    )
+                    .success(
+                        fn=lambda: [
+                            gr.update(value=None),
+                            gr.update(value="select"),
+                        ],
+                        outputs=[
+                            self._app.chat_page.quick_urls,
+                            self._app.chat_page._indices_input[0],
+                        ],
+                    )
+                )
+                for event in self._app.get_event(f"onFileIndex{self._index.id}Changed"):
+                    quickURLUploadedEvent = quickURLUploadedEvent.then(**event)
+
                quickUploadedEvent.success(
                    fn=lambda x: x,
                    inputs=self.quick_upload_state,
@@ -701,6 +740,30 @@ class FileIndexPage(BasePage):
                    inputs=[self._app.user_id, self.filter],
                    outputs=[self.file_list_state, self.file_list],
                    concurrency_limit=20,
+                ).then(
+                    fn=lambda: True,
+                    inputs=None,
+                    outputs=None,
+                    js=chat_input_focus_js,
+                )
+
+                quickURLUploadedEvent.success(
+                    fn=lambda x: x,
+                    inputs=self.quick_upload_state,
+                    outputs=self._app.chat_page._indices_input[1],
+                ).then(
+                    fn=lambda: gr.update(value="Indexing completed."),
+                    outputs=self._app.chat_page.quick_file_upload_status,
+                ).then(
+                    fn=self.list_file,
+                    inputs=[self._app.user_id, self.filter],
+                    outputs=[self.file_list_state, self.file_list],
+                    concurrency_limit=20,
+                ).then(
+                    fn=lambda: True,
+                    inputs=None,
+                    outputs=None,
+                    js=chat_input_focus_js,
                )

        except Exception as e:
@@ -951,7 +1014,7 @@ class FileIndexPage(BasePage):

        return results

-    def index_fn_with_default_loaders(
+    def index_fn_file_with_default_loaders(
        self, files, reindex: bool, settings, user_id
    ) -> list["str"]:
        """Function for quick upload with default loaders
@@ -991,6 +1054,22 @@ class FileIndexPage(BasePage):

        return exist_ids + returned_ids

+    def index_fn_url_with_default_loaders(self, urls, reindex: bool, settings, user_id):
+        returned_ids = []
+        settings = deepcopy(settings)
+        settings[f"index.options.{self._index.id}.reader_mode"] = "default"
+        settings[f"index.options.{self._index.id}.quick_index_mode"] = True
+
+        if urls:
+            _iter = self.index_fn([], urls, reindex, settings, user_id)
+            try:
+                while next(_iter):
+                    pass
+            except StopIteration as e:
+                returned_ids = e.value
+
+        return returned_ids
+
    def index_files_from_dir(
        self, folder_path, reindex, settings, user_id
    ) -> Generator[tuple[str, str], None, None]:
--- a/libs/ktem/ktem/pages/chat/init.py
+++ b/libs/ktem/ktem/pages/chat/init.py
@@ -40,9 +40,29 @@ function() {
        links[i].onclick = openModal;
    }

+    // Get all citation links and attach click event
+    var links = document.querySelectorAll("a.citation");
+    for (var i = 0; i < links.length; i++) {
+        links[i].onclick = scrollToCitation;
+    }
+
    var mindmap_el = document.getElementById('mindmap');
+
    if (mindmap_el) {
        var output = svgPanZoom(mindmap_el);
+        const svg = mindmap_el.cloneNode(true);
+
+        function on_svg_export(event) {
+            event.preventDefault(); // Prevent the default link behavior
+            // convert to a valid XML source
+            const as_text = new XMLSerializer().serializeToString(svg);
+            // store in a Blob
+            const blob = new Blob([as_text], { type: "image/svg+xml" });
+            // create an URI pointing to that blob
+            const url = URL.createObjectURL(blob);
+            const win = open(url);
+            // so the Garbage Collector can collect the blob
+            win.onload = (evt) => URL.revokeObjectURL(url);
        }

        var link = document.getElementById("mindmap-toggle");
@@ -62,6 +82,12 @@ function() {
            };
        }

+        var link = document.getElementById("mindmap-export");
+        if (link) {
+            link.addEventListener('click', on_svg_export);
+        }
+    }
+
    return [links.length]
 }
 """
@@ -127,6 +153,14 @@ class ChatPage(BasePage):
                            file_count="multiple",
                            container=True,
                            show_label=False,
+                            elem_id="quick-file",
+                        )
+                        self.quick_urls = gr.Textbox(
+                            placeholder="Or paste URLs here",
+                            lines=1,
+                            container=False,
+                            show_label=False,
+                            elem_id="quick-url",
                        )
                        self.quick_file_upload_status = gr.Markdown()

@@ -136,12 +170,17 @@ class ChatPage(BasePage):
                self.chat_panel = ChatPanel(self._app)

                with gr.Row():
-                    with gr.Accordion(label="Chat settings", open=False):
+                    with gr.Accordion(
+                        label="Chat settings",
+                        elem_id="chat-settings-expand",
+                        open=False,
+                    ):
                        # a quick switch for reasoning type option
                        with gr.Row():
                            gr.HTML("Reasoning method")
                            gr.HTML("Model")
-                            gr.HTML("Generate mindmap")
+                            gr.HTML("Language")
+                            gr.HTML("Citation")

                        with gr.Row():
                            reasoning_type_values = [
@@ -165,17 +204,36 @@ class ChatPage(BasePage):
                                container=False,
                                show_label=False,
                            )
-                            binary_default_choices = [
+                            self.language = gr.Dropdown(
+                                choices=[
                                    (DEFAULT_SETTING, DEFAULT_SETTING),
-                                ("Enable", True),
-                                ("Disable", False),
                                ]
-                            self.use_mindmap = gr.Dropdown(
+                                + self._app.default_settings.reasoning.settings[
+                                    "lang"
+                                ].choices,
                                value=DEFAULT_SETTING,
-                                choices=binary_default_choices,
                                container=False,
                                show_label=False,
                            )
+                            self.citation = gr.Dropdown(
+                                choices=[
+                                    (DEFAULT_SETTING, DEFAULT_SETTING),
+                                ]
+                                + self._app.default_settings.reasoning.options["simple"]
+                                .settings["highlight_citation"]
+                                .choices,
+                                value=DEFAULT_SETTING,
+                                container=False,
+                                show_label=False,
+                                interactive=True,
+                            )
+
+                            self.use_mindmap = gr.State(value=DEFAULT_SETTING)
+                            self.use_mindmap_check = gr.Checkbox(
+                                label="Mindmap (default)",
+                                container=False,
+                                elem_id="use-mindmap-checkbox",
+                            )

            with gr.Column(
                scale=INFO_PANEL_SCALES[False], elem_id="chat-info-panel"
@@ -235,6 +293,8 @@ class ChatPage(BasePage):
                    self._reasoning_type,
                    self.model_type,
                    self.use_mindmap,
+                    self.citation,
+                    self.language,
                    self.state_chat,
                    self._app.user_id,
                ]
@@ -506,6 +566,12 @@ class ChatPage(BasePage):
            inputs=[self.reasoning_type],
            outputs=[self._reasoning_type],
        )
+        self.use_mindmap_check.change(
+            lambda x: (x, gr.update(label="Mindmap " + ("(on)" if x else "(off)"))),
+            inputs=[self.use_mindmap_check],
+            outputs=[self.use_mindmap, self.use_mindmap_check],
+            show_progress="hidden",
+        )
        self.chat_control.conversation_id.change(
            lambda: gr.update(visible=False),
            outputs=self.plot_panel,
@@ -722,6 +788,8 @@ class ChatPage(BasePage):
        session_reasoning_type: str,
        session_llm: str,
        session_use_mindmap: bool | str,
+        session_use_citation: str,
+        session_language: str,
        state: dict,
        user_id: int,
        *selecteds,
@@ -743,6 +811,10 @@ class ChatPage(BasePage):
            session_reasoning_type,
            "use mindmap",
            session_use_mindmap,
+            "use citation",
+            session_use_citation,
+            "language",
+            session_language,
        )
        print("Session LLM", session_llm)
        reasoning_mode = (
@@ -766,6 +838,14 @@ class ChatPage(BasePage):
        if session_use_mindmap not in (DEFAULT_SETTING, None):
            settings["reasoning.options.simple.create_mindmap"] = session_use_mindmap

+        if session_use_citation not in (DEFAULT_SETTING, None):
+            settings[
+                "reasoning.options.simple.highlight_citation"
+            ] = session_use_citation
+
+        if session_language not in (DEFAULT_SETTING, None):
+            settings["reasoning.lang"] = session_language
+
        # get retrievers
        retrievers = []
        for index in self._app.index_manager.indices:
@@ -798,6 +878,8 @@ class ChatPage(BasePage):
        reasoning_type,
        llm_type,
        use_mind_map,
+        use_citation,
+        language,
        state,
        user_id,
        *selecteds,
@@ -814,7 +896,15 @@ class ChatPage(BasePage):

        # construct the pipeline
        pipeline, reasoning_state = self.create_pipeline(
-            settings, reasoning_type, llm_type, use_mind_map, state, user_id, *selecteds
+            settings,
+            reasoning_type,
+            llm_type,
+            use_mind_map,
+            use_citation,
+            language,
+            state,
+            user_id,
+            *selecteds,
        )
        print("Reasoning state", reasoning_state)
        pipeline.set_output_queue(queue)
--- a/libs/ktem/ktem/pages/chat/chat_panel.py
+++ b/libs/ktem/ktem/pages/chat/chat_panel.py
@@ -28,6 +28,7 @@ class ChatPanel(BasePage):
                placeholder="Chat input",
                container=False,
                show_label=False,
+                elem_id="chat-input",
            )

    def submit_msg(self, chat_input, chat_history):
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -1,17 +1,10 @@
-import html
 import logging
 import threading
-from collections import defaultdict
-from difflib import SequenceMatcher
-from functools import partial
 from typing import Generator

-import numpy as np
-import tiktoken
 from ktem.embeddings.manager import embedding_models_manager as embeddings
 from ktem.llms.manager import llms
 from ktem.reasoning.prompt_optimization import (
-    CreateMindmapPipeline,
    DecomposeQuestionPipeline,
    RewriteQuestionPipeline,
 )
@@ -19,7 +12,6 @@ from ktem.utils.plantuml import PlantUML
 from ktem.utils.render import Render
 from ktem.utils.visualize_cited import CreateCitationVizPipeline
 from plotly.io import to_json
-from theflow.settings import settings as flowsettings

 from kotaemon.base import (
    AIMessage,
@@ -30,399 +22,20 @@ from kotaemon.base import (
    RetrievedDocument,
    SystemMessage,
 )
-from kotaemon.indices.qa.citation import CitationPipeline
-from kotaemon.indices.splitters import TokenSplitter
-from kotaemon.llms import ChatLLM, PromptTemplate
+from kotaemon.indices.qa.citation_qa import (
+    CONTEXT_RELEVANT_WARNING_SCORE,
+    DEFAULT_QA_TEXT_PROMPT,
+    AnswerWithContextPipeline,
+)
+from kotaemon.indices.qa.citation_qa_inline import AnswerWithInlineCitation
+from kotaemon.indices.qa.format_context import PrepareEvidencePipeline
+from kotaemon.llms import ChatLLM

 from ..utils import SUPPORTED_LANGUAGE_MAP
 from .base import BaseReasoning

 logger = logging.getLogger(__name__)

-EVIDENCE_MODE_TEXT = 0
-EVIDENCE_MODE_TABLE = 1
-EVIDENCE_MODE_CHATBOT = 2
-EVIDENCE_MODE_FIGURE = 3
-MAX_IMAGES = 10
-CITATION_TIMEOUT = 5.0
-
-
-def find_text(search_span, context):
-    sentence_list = search_span.split("\n")
-    context = context.replace("\n", " ")
-
-    matches = []
-    # don't search for small text
-    if len(search_span) > 5:
-        for sentence in sentence_list:
-            match = SequenceMatcher(
-                None, sentence, context, autojunk=False
-            ).find_longest_match()
-            if match.size > max(len(sentence) * 0.35, 5):
-                matches.append((match.b, match.b + match.size))
-
-    return matches
-
-
-class PrepareEvidencePipeline(BaseComponent):
-    """Prepare the evidence text from the list of retrieved documents
-
-    This step usually happens after `DocumentRetrievalPipeline`.
-
-    Args:
-        trim_func: a callback function or a BaseComponent, that splits a large
-            chunk of text into smaller ones. The first one will be retained.
-    """
-
-    max_context_length: int = 32000
-    trim_func: TokenSplitter | None = None
-
-    def run(self, docs: list[RetrievedDocument]) -> Document:
-        evidence = ""
-        images = []
-        table_found = 0
-        evidence_modes = []
-
-        evidence_trim_func = (
-            self.trim_func
-            if self.trim_func
-            else TokenSplitter(
-                chunk_size=self.max_context_length,
-                chunk_overlap=0,
-                separator=" ",
-                tokenizer=partial(
-                    tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
-                    allowed_special=set(),
-                    disallowed_special="all",
-                ),
-            )
-        )
-
-        for _id, retrieved_item in enumerate(docs):
-            retrieved_content = ""
-            page = retrieved_item.metadata.get("page_label", None)
-            source = filename = retrieved_item.metadata.get("file_name", "-")
-            if page:
-                source += f" (Page {page})"
-            if retrieved_item.metadata.get("type", "") == "table":
-                evidence_modes.append(EVIDENCE_MODE_TABLE)
-                if table_found < 5:
-                    retrieved_content = retrieved_item.metadata.get(
-                        "table_origin", retrieved_item.text
-                    )
-                    if retrieved_content not in evidence:
-                        table_found += 1
-                        evidence += (
-                            f"<br><b>Table from {source}</b>\n"
-                            + retrieved_content
-                            + "\n<br>"
-                        )
-            elif retrieved_item.metadata.get("type", "") == "chatbot":
-                evidence_modes.append(EVIDENCE_MODE_CHATBOT)
-                retrieved_content = retrieved_item.metadata["window"]
-                evidence += (
-                    f"<br><b>Chatbot scenario from {filename} (Row {page})</b>\n"
-                    + retrieved_content
-                    + "\n<br>"
-                )
-            elif retrieved_item.metadata.get("type", "") == "image":
-                evidence_modes.append(EVIDENCE_MODE_FIGURE)
-                retrieved_content = retrieved_item.metadata.get("image_origin", "")
-                retrieved_caption = html.escape(retrieved_item.get_content())
-                evidence += (
-                    f"<br><b>Figure from {source}</b>\n"
-                    + "<img width='85%' src='<src>' "
-                    + f"alt='{retrieved_caption}'/>"
-                    + "\n<br>"
-                )
-                images.append(retrieved_content)
-            else:
-                if "window" in retrieved_item.metadata:
-                    retrieved_content = retrieved_item.metadata["window"]
-                else:
-                    retrieved_content = retrieved_item.text
-                retrieved_content = retrieved_content.replace("\n", " ")
-                if retrieved_content not in evidence:
-                    evidence += (
-                        f"<br><b>Content from {source}: </b> "
-                        + retrieved_content
-                        + " \n<br>"
-                    )
-
-        # resolve evidence mode
-        evidence_mode = EVIDENCE_MODE_TEXT
-        if EVIDENCE_MODE_FIGURE in evidence_modes:
-            evidence_mode = EVIDENCE_MODE_FIGURE
-        elif EVIDENCE_MODE_TABLE in evidence_modes:
-            evidence_mode = EVIDENCE_MODE_TABLE
-
-        # trim context by trim_len
-        print("len (original)", len(evidence))
-        if evidence:
-            texts = evidence_trim_func([Document(text=evidence)])
-            evidence = texts[0].text
-            print("len (trimmed)", len(evidence))
-
-        return Document(content=(evidence_mode, evidence, images))
-
-
-DEFAULT_QA_TEXT_PROMPT = (
-    "Use the following pieces of context to answer the question at the end in detail with clear explanation. "  # noqa: E501
-    "If you don't know the answer, just say that you don't know, don't try to "
-    "make up an answer. Give answer in "
-    "{lang}.\n\n"
-    "{context}\n"
-    "Question: {question}\n"
-    "Helpful Answer:"
-)
-
-DEFAULT_QA_TABLE_PROMPT = (
-    "Use the given context: texts, tables, and figures below to answer the question, "
-    "then provide answer with clear explanation."
-    "If you don't know the answer, just say that you don't know, "
-    "don't try to make up an answer. Give answer in {lang}.\n\n"
-    "Context:\n"
-    "{context}\n"
-    "Question: {question}\n"
-    "Helpful Answer:"
-)  # noqa
-
-DEFAULT_QA_CHATBOT_PROMPT = (
-    "Pick the most suitable chatbot scenarios to answer the question at the end, "
-    "output the provided answer text. If you don't know the answer, "
-    "just say that you don't know. Keep the answer as concise as possible. "
-    "Give answer in {lang}.\n\n"
-    "Context:\n"
-    "{context}\n"
-    "Question: {question}\n"
-    "Answer:"
-)  # noqa
-
-DEFAULT_QA_FIGURE_PROMPT = (
-    "Use the given context: texts, tables, and figures below to answer the question. "
-    "If you don't know the answer, just say that you don't know. "
-    "Give answer in {lang}.\n\n"
-    "Context: \n"
-    "{context}\n"
-    "Question: {question}\n"
-    "Answer: "
-)  # noqa
-
-CONTEXT_RELEVANT_WARNING_SCORE = 0.7
-
-
-class AnswerWithContextPipeline(BaseComponent):
-    """Answer the question based on the evidence
-
-    Args:
-        llm: the language model to generate the answer
-        citation_pipeline: generates citation from the evidence
-        qa_template: the prompt template for LLM to generate answer (refer to
-            evidence_mode)
-        qa_table_template: the prompt template for LLM to generate answer for table
-            (refer to evidence_mode)
-        qa_chatbot_template: the prompt template for LLM to generate answer for
-            pre-made scenarios (refer to evidence_mode)
-        lang: the language of the answer. Currently support English and Japanese
-    """
-
-    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())
-    vlm_endpoint: str = getattr(flowsettings, "KH_VLM_ENDPOINT", "")
-    use_multimodal: bool = getattr(flowsettings, "KH_REASONINGS_USE_MULTIMODAL", True)
-    citation_pipeline: CitationPipeline = Node(
-        default_callback=lambda _: CitationPipeline(llm=llms.get_default())
-    )
-    create_mindmap_pipeline: CreateMindmapPipeline = Node(
-        default_callback=lambda _: CreateMindmapPipeline(llm=llms.get_default())
-    )
-
-    qa_template: str = DEFAULT_QA_TEXT_PROMPT
-    qa_table_template: str = DEFAULT_QA_TABLE_PROMPT
-    qa_chatbot_template: str = DEFAULT_QA_CHATBOT_PROMPT
-    qa_figure_template: str = DEFAULT_QA_FIGURE_PROMPT
-
-    enable_citation: bool = False
-    enable_mindmap: bool = False
-    enable_citation_viz: bool = False
-
-    system_prompt: str = ""
-    lang: str = "English"  # support English and Japanese
-    n_last_interactions: int = 5
-
-    def get_prompt(self, question, evidence, evidence_mode: int):
-        """Prepare the prompt and other information for LLM"""
-        if evidence_mode == EVIDENCE_MODE_TEXT:
-            prompt_template = PromptTemplate(self.qa_template)
-        elif evidence_mode == EVIDENCE_MODE_TABLE:
-            prompt_template = PromptTemplate(self.qa_table_template)
-        elif evidence_mode == EVIDENCE_MODE_FIGURE:
-            if self.use_multimodal:
-                prompt_template = PromptTemplate(self.qa_figure_template)
-            else:
-                prompt_template = PromptTemplate(self.qa_template)
-        else:
-            prompt_template = PromptTemplate(self.qa_chatbot_template)
-
-        prompt = prompt_template.populate(
-            context=evidence,
-            question=question,
-            lang=self.lang,
-        )
-
-        return prompt, evidence
-
-    def run(
-        self, question: str, evidence: str, evidence_mode: int = 0, **kwargs
-    ) -> Document:
-        return self.invoke(question, evidence, evidence_mode, **kwargs)
-
-    def invoke(
-        self,
-        question: str,
-        evidence: str,
-        evidence_mode: int = 0,
-        images: list[str] = [],
-        **kwargs,
-    ) -> Document:
-        raise NotImplementedError
-
-    async def ainvoke(  # type: ignore
-        self,
-        question: str,
-        evidence: str,
-        evidence_mode: int = 0,
-        images: list[str] = [],
-        **kwargs,
-    ) -> Document:
-        """Answer the question based on the evidence
-
-        In addition to the question and the evidence, this method also take into
-        account evidence_mode. The evidence_mode tells which kind of evidence is.
-        The kind of evidence affects:
-            1. How the evidence is represented.
-            2. The prompt to generate the answer.
-
-        By default, the evidence_mode is 0, which means the evidence is plain text with
-        no particular semantic representation. The evidence_mode can be:
-            1. "table": There will be HTML markup telling that there is a table
-                within the evidence.
-            2. "chatbot": There will be HTML markup telling that there is a chatbot.
-                This chatbot is a scenario, extracted from an Excel file, where each
-                row corresponds to an interaction.
-
-        Args:
-            question: the original question posed by user
-            evidence: the text that contain relevant information to answer the question
-                (determined by retrieval pipeline)
-            evidence_mode: the mode of evidence, 0 for text, 1 for table, 2 for chatbot
-        """
-        raise NotImplementedError
-
-    def stream(  # type: ignore
-        self,
-        question: str,
-        evidence: str,
-        evidence_mode: int = 0,
-        images: list[str] = [],
-        **kwargs,
-    ) -> Generator[Document, None, Document]:
-        history = kwargs.get("history", [])
-        print(f"Got {len(images)} images")
-        # check if evidence exists, use QA prompt
-        if evidence:
-            prompt, evidence = self.get_prompt(question, evidence, evidence_mode)
-        else:
-            prompt = question
-
-        # retrieve the citation
-        citation = None
-        mindmap = None
-
-        def citation_call():
-            nonlocal citation
-            citation = self.citation_pipeline(context=evidence, question=question)
-
-        def mindmap_call():
-            nonlocal mindmap
-            mindmap = self.create_mindmap_pipeline(context=evidence, question=question)
-
-        citation_thread = None
-        mindmap_thread = None
-
-        # execute function call in thread
-        if evidence:
-            if self.enable_citation:
-                citation_thread = threading.Thread(target=citation_call)
-                citation_thread.start()
-
-            if self.enable_mindmap:
-                mindmap_thread = threading.Thread(target=mindmap_call)
-                mindmap_thread.start()
-
-        output = ""
-        logprobs = []
-
-        messages = []
-        if self.system_prompt:
-            messages.append(SystemMessage(content=self.system_prompt))
-        for human, ai in history[-self.n_last_interactions :]:
-            messages.append(HumanMessage(content=human))
-            messages.append(AIMessage(content=ai))
-
-        if self.use_multimodal and evidence_mode == EVIDENCE_MODE_FIGURE:
-            # create image message:
-            messages.append(
-                HumanMessage(
-                    content=[
-                        {"type": "text", "text": prompt},
-                    ]
-                    + [
-                        {
-                            "type": "image_url",
-                            "image_url": {"url": image},
-                        }
-                        for image in images[:MAX_IMAGES]
-                    ],
-                )
-            )
-        else:
-            # append main prompt
-            messages.append(HumanMessage(content=prompt))
-
-        try:
-            # try streaming first
-            print("Trying LLM streaming")
-            for out_msg in self.llm.stream(messages):
-                output += out_msg.text
-                logprobs += out_msg.logprobs
-                yield Document(channel="chat", content=out_msg.text)
-        except NotImplementedError:
-            print("Streaming is not supported, falling back to normal processing")
-            output = self.llm(messages).text
-            yield Document(channel="chat", content=output)
-
-        if logprobs:
-            qa_score = np.exp(np.average(logprobs))
-        else:
-            qa_score = None
-
-        if citation_thread:
-            citation_thread.join(timeout=CITATION_TIMEOUT)
-        if mindmap_thread:
-            mindmap_thread.join(timeout=CITATION_TIMEOUT)
-
-        answer = Document(
-            text=output,
-            metadata={
-                "citation_viz": self.enable_citation_viz,
-                "mindmap": mindmap,
-                "citation": citation,
-                "qa_score": qa_score,
-            },
-        )
-
-        return answer
-

 class AddQueryContextPipeline(BaseComponent):

@@ -481,7 +94,7 @@ class FullQAPipeline(BaseReasoning):
    retrievers: list[BaseComponent]

    evidence_pipeline: PrepareEvidencePipeline = PrepareEvidencePipeline.withx()
-    answering_pipeline: AnswerWithContextPipeline = AnswerWithContextPipeline.withx()
+    answering_pipeline: AnswerWithContextPipeline
    rewrite_pipeline: RewriteQuestionPipeline | None = None
    create_citation_viz_pipeline: CreateCitationVizPipeline = Node(
        default_callback=lambda _: CreateCitationVizPipeline(
@@ -548,104 +161,35 @@ class FullQAPipeline(BaseReasoning):

        return docs, info

-    def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document]]:
-        """Prepare the citations to show on the UI"""
-        with_citation, without_citation = [], []
-        spans = defaultdict(list)
-        has_llm_score = any("llm_trulens_score" in doc.metadata for doc in docs)
-
-        if answer.metadata["citation"]:
-            evidences = answer.metadata["citation"].evidences
-            for quote in evidences:
-                matched_excerpts = []
-                for doc in docs:
-                    matches = find_text(quote, doc.text)
-
-                    for start, end in matches:
-                        if "|" not in doc.text[start:end]:
-                            spans[doc.doc_id].append(
-                                {
-                                    "start": start,
-                                    "end": end,
-                                }
-                            )
-                            matched_excerpts.append(doc.text[start:end])
-
-                # print("Matched citation:", quote, matched_excerpts),
-
-        id2docs = {doc.doc_id: doc for doc in docs}
-        not_detected = set(id2docs.keys()) - set(spans.keys())
-
-        # render highlight spans
-        for _id, ss in spans.items():
-            if not ss:
-                not_detected.add(_id)
-                continue
-            cur_doc = id2docs[_id]
-            highlight_text = ""
-
-            ss = sorted(ss, key=lambda x: x["start"])
-            text = cur_doc.text[: ss[0]["start"]]
-            for idx, span in enumerate(ss):
-                to_highlight = cur_doc.text[span["start"] : span["end"]]
-                if len(to_highlight) > len(highlight_text):
-                    highlight_text = to_highlight
-                text += Render.highlight(to_highlight)
-                if idx < len(ss) - 1:
-                    text += cur_doc.text[span["end"] : ss[idx + 1]["start"]]
-            text += cur_doc.text[ss[-1]["end"] :]
-            # add to display list
-            with_citation.append(
-                Document(
-                    channel="info",
-                    content=Render.collapsible_with_header_score(
-                        cur_doc,
-                        override_text=text,
-                        highlight_text=highlight_text,
-                        open_collapsible=True,
-                    ),
-                )
-            )
-
-        print("Got {} cited docs".format(len(with_citation)))
-
-        sorted_not_detected_items_with_scores = [
-            (id_, id2docs[id_].metadata.get("llm_trulens_score", 0.0))
-            for id_ in not_detected
-        ]
-        sorted_not_detected_items_with_scores.sort(key=lambda x: x[1], reverse=True)
-
-        for id_, _ in sorted_not_detected_items_with_scores:
-            doc = id2docs[id_]
-            doc_score = doc.metadata.get("llm_trulens_score", 0.0)
-            is_open = not has_llm_score or (
-                doc_score > CONTEXT_RELEVANT_WARNING_SCORE and len(with_citation) == 0
-            )
-            without_citation.append(
-                Document(
-                    channel="info",
-                    content=Render.collapsible_with_header_score(
-                        doc, open_collapsible=is_open
-                    ),
-                )
-            )
-        return with_citation, without_citation
-
    def prepare_mindmap(self, answer) -> Document | None:
        mindmap = answer.metadata["mindmap"]
        if mindmap:
            mindmap_text = mindmap.text
            uml_renderer = PlantUML()
+
+            try:
                mindmap_svg = uml_renderer.process(mindmap_text)
+            except Exception as e:
+                print("Failed to process mindmap:", e)
+                mindmap_svg = "<svg></svg>"
+
+            # post-process the mindmap SVG
+            mindmap_svg = (
+                mindmap_svg.replace("sans-serif", "Quicksand, sans-serif")
+                .replace("#181818", "#cecece")
+                .replace("background:#FFFFF", "background:none")
+                .replace("stroke-width:1", "stroke-width:2")
+            )

            mindmap_content = Document(
                channel="info",
                content=Render.collapsible(
                    header="""
                    <i>Mindmap</i>
-                    <a href="#" id='mindmap-toggle'">
-                        [Expand]
-                    </a>""",
+                    <a href="#" id='mindmap-toggle'>
+                        [Expand]</a>
+                    <a href="#" id='mindmap-export'>
+                        [Export]</a>""",
                    content=mindmap_svg,
                    open=True,
                ),
@@ -674,7 +218,9 @@ class FullQAPipeline(BaseReasoning):

    def show_citations_and_addons(self, answer, docs, question):
        # show the evidence
-        with_citation, without_citation = self.prepare_citations(answer, docs)
+        with_citation, without_citation = self.answering_pipeline.prepare_citations(
+            answer, docs
+        )
        mindmap_output = self.prepare_mindmap(answer)
        citation_plot_output = self.prepare_citation_viz(answer, question, docs)

@@ -773,6 +319,13 @@ class FullQAPipeline(BaseReasoning):

        return answer

+    @classmethod
+    def prepare_pipeline_instance(cls, settings, retrievers):
+        return cls(
+            retrievers=retrievers,
+            rewrite_pipeline=RewriteQuestionPipeline(),
+        )
+
    @classmethod
    def get_pipeline(cls, settings, states, retrievers):
        """Get the reasoning pipeline
@@ -783,10 +336,7 @@ class FullQAPipeline(BaseReasoning):
        """
        max_context_length_setting = settings.get("reasoning.max_context_length", 32000)

-        pipeline = cls(
-            retrievers=retrievers,
-            rewrite_pipeline=RewriteQuestionPipeline(),
-        )
+        pipeline = cls.prepare_pipeline_instance(settings, retrievers)

        prefix = f"reasoning.options.{cls.get_info()['id']}"
        llm_name = settings.get(f"{prefix}.llm", None)
@@ -797,13 +347,22 @@ class FullQAPipeline(BaseReasoning):
        evidence_pipeline.max_context_length = max_context_length_setting

        # answering pipeline configuration
-        answer_pipeline = pipeline.answering_pipeline
+        use_inline_citation = settings[f"{prefix}.highlight_citation"] == "inline"
+
+        if use_inline_citation:
+            answer_pipeline = pipeline.answering_pipeline = AnswerWithInlineCitation()
+        else:
+            answer_pipeline = pipeline.answering_pipeline = AnswerWithContextPipeline()
+
        answer_pipeline.llm = llm
        answer_pipeline.citation_pipeline.llm = llm
        answer_pipeline.n_last_interactions = settings[f"{prefix}.n_last_interactions"]
-        answer_pipeline.enable_citation = settings[f"{prefix}.highlight_citation"]
+        answer_pipeline.enable_citation = (
+            settings[f"{prefix}.highlight_citation"] != "off"
+        )
        answer_pipeline.enable_mindmap = settings[f"{prefix}.create_mindmap"]
        answer_pipeline.enable_citation_viz = settings[f"{prefix}.create_citation_viz"]
+        answer_pipeline.use_multimodal = settings[f"{prefix}.use_multimodal"]
        answer_pipeline.system_prompt = settings[f"{prefix}.system_prompt"]
        answer_pipeline.qa_template = settings[f"{prefix}.qa_prompt"]
        answer_pipeline.lang = SUPPORTED_LANGUAGE_MAP.get(
@@ -848,9 +407,10 @@ class FullQAPipeline(BaseReasoning):
                ),
            },
            "highlight_citation": {
-                "name": "Highlight Citation",
-                "value": True,
-                "component": "checkbox",
+                "name": "Citation style",
+                "value": "highlight",
+                "component": "radio",
+                "choices": ["highlight", "inline", "off"],
            },
            "create_mindmap": {
                "name": "Create Mindmap",
@@ -862,6 +422,11 @@ class FullQAPipeline(BaseReasoning):
                "value": False,
                "component": "checkbox",
            },
+            "use_multimodal": {
+                "name": "Use Multimodal Input",
+                "value": False,
+                "component": "checkbox",
+            },
            "system_prompt": {
                "name": "System Prompt",
                "value": "This is a question answering system",
@@ -979,7 +544,9 @@ class FullDecomposeQAPipeline(FullQAPipeline):
        )

        # show the evidence
-        with_citation, without_citation = self.prepare_citations(answer, docs)
+        with_citation, without_citation = self.answering_pipeline.prepare_citations(
+            answer, docs
+        )
        if not with_citation and not without_citation:
            yield Document(channel="info", content="<h5><b>No evidence found.</b></h5>")
        else:
@@ -999,13 +566,7 @@ class FullDecomposeQAPipeline(FullQAPipeline):
        return user_settings

    @classmethod
-    def get_pipeline(cls, settings, states, retrievers):
-        """Get the reasoning pipeline
-
-        Args:
-            settings: the settings for the pipeline
-            retrievers: the retrievers to use
-        """
+    def prepare_pipeline_instance(cls, settings, retrievers):
        prefix = f"reasoning.options.{cls.get_info()['id']}"
        pipeline = cls(
            retrievers=retrievers,
@@ -1013,31 +574,6 @@ class FullDecomposeQAPipeline(FullQAPipeline):
                prompt_template=settings.get(f"{prefix}.decompose_prompt")
            ),
        )
-
-        llm_name = settings.get(f"{prefix}.llm", None)
-        llm = llms.get(llm_name, llms.get_default())
-
-        # answering pipeline configuration
-        answer_pipeline = pipeline.answering_pipeline
-        answer_pipeline.llm = llm
-        answer_pipeline.citation_pipeline.llm = llm
-        answer_pipeline.n_last_interactions = settings[f"{prefix}.n_last_interactions"]
-        answer_pipeline.enable_citation = settings[f"{prefix}.highlight_citation"]
-        answer_pipeline.system_prompt = settings[f"{prefix}.system_prompt"]
-        answer_pipeline.qa_template = settings[f"{prefix}.qa_prompt"]
-        answer_pipeline.lang = SUPPORTED_LANGUAGE_MAP.get(
-            settings["reasoning.lang"], "English"
-        )
-
-        pipeline.add_query_context.llm = llm
-        pipeline.add_query_context.n_last_interactions = settings[
-            f"{prefix}.n_last_interactions"
-        ]
-
-        pipeline.trigger_context = settings[f"{prefix}.trigger_context"]
-        pipeline.use_rewrite = states.get("app", {}).get("regen", False)
-        if pipeline.rewrite_pipeline:
-            pipeline.rewrite_pipeline.llm = llm
        return pipeline

    @classmethod
--- a/libs/ktem/ktem/utils/render.py
+++ b/libs/ktem/ktem/utils/render.py
@@ -40,7 +40,10 @@ class Render:
    def collapsible(header, content, open: bool = False) -> str:
        """Render an HTML friendly collapsible section"""
        o = " open" if open else ""
-        return f"<details{o}><summary>{header}</summary>{content}</details><br>"
+        return (
+            f"<details class='evidence' {o}><summary>"
+            f"{header}</summary>{content}</details><br>"
+        )

    @staticmethod
    def table(text: str) -> str:
@@ -103,9 +106,10 @@ class Render:
        """  # noqa

    @staticmethod
-    def highlight(text: str) -> str:
+    def highlight(text: str, elem_id: str | None = None) -> str:
        """Highlight text"""
-        return f"<mark>{text}</mark>"
+        id_text = f" id='mark-{elem_id}'" if elem_id else ""
+        return f"<mark{id_text}>{text}</mark>"

    @staticmethod
    def image(url: str, text: str = "") -> str: