From 7e34e4343b5903da93cf37c35bc26233c56f4930 Mon Sep 17 00:00:00 2001 From: "Tuan Anh Nguyen Dang (Tadashi_Cin)" Date: Mon, 25 Nov 2024 12:07:02 +0700 Subject: [PATCH] feat: add inline citation style (#523) bump:minor * feat: add URL quick index, export mindmap, refine UI & animation * fix: inject multimodal mode from env var * fix: minor update css * feat: add citation inline mode * fix: minor update citation inline pipeline * feat: add citation quick setting * fix: minor update * fix: minor update --- flowsettings.py | 2 +- libs/kotaemon/kotaemon/indices/qa/__init__.py | 2 - .../kotaemon/indices/qa/citation_qa.py | 390 ++++++++++++ .../kotaemon/indices/qa/citation_qa_inline.py | 267 ++++++++ .../kotaemon/indices/qa/format_context.py | 114 ++++ .../kotaemon/indices/qa/text_based.py | 63 -- libs/kotaemon/kotaemon/indices/qa/utils.py | 53 ++ libs/kotaemon/kotaemon/indices/vectorindex.py | 8 - .../kotaemon/llms/prompts/template.py | 5 +- libs/ktem/ktem/assets/css/main.css | 67 +- libs/ktem/ktem/assets/js/main.js | 22 + libs/ktem/ktem/db/base_models.py | 4 +- libs/ktem/ktem/index/file/pipelines.py | 3 + libs/ktem/ktem/index/file/ui.py | 83 ++- libs/ktem/ktem/pages/chat/__init__.py | 142 ++++- libs/ktem/ktem/pages/chat/chat_panel.py | 1 + libs/ktem/ktem/reasoning/simple.py | 588 ++---------------- libs/ktem/ktem/utils/render.py | 10 +- 18 files changed, 1173 insertions(+), 651 deletions(-) create mode 100644 libs/kotaemon/kotaemon/indices/qa/citation_qa.py create mode 100644 libs/kotaemon/kotaemon/indices/qa/citation_qa_inline.py create mode 100644 libs/kotaemon/kotaemon/indices/qa/format_context.py delete mode 100644 libs/kotaemon/kotaemon/indices/qa/text_based.py create mode 100644 libs/kotaemon/kotaemon/indices/qa/utils.py diff --git a/flowsettings.py b/flowsettings.py index c4bb74e..b8d4c35 100644 --- a/flowsettings.py +++ b/flowsettings.py @@ -255,7 +255,7 @@ KH_REASONINGS = [ "ktem.reasoning.react.ReactAgentPipeline", "ktem.reasoning.rewoo.RewooAgentPipeline", ] -KH_REASONINGS_USE_MULTIMODAL = False +KH_REASONINGS_USE_MULTIMODAL = config("USE_MULTIMODAL", default=False, cast=bool) KH_VLM_ENDPOINT = "{0}/openai/deployments/{1}/chat/completions?api-version={2}".format( config("AZURE_OPENAI_ENDPOINT", default=""), config("OPENAI_VISION_DEPLOYMENT_NAME", default="gpt-4o"), diff --git a/libs/kotaemon/kotaemon/indices/qa/__init__.py b/libs/kotaemon/kotaemon/indices/qa/__init__.py index 03a185f..d1a6a99 100644 --- a/libs/kotaemon/kotaemon/indices/qa/__init__.py +++ b/libs/kotaemon/kotaemon/indices/qa/__init__.py @@ -1,7 +1,5 @@ from .citation import CitationPipeline -from .text_based import CitationQAPipeline __all__ = [ "CitationPipeline", - "CitationQAPipeline", ] diff --git a/libs/kotaemon/kotaemon/indices/qa/citation_qa.py b/libs/kotaemon/kotaemon/indices/qa/citation_qa.py new file mode 100644 index 0000000..0fda8c7 --- /dev/null +++ b/libs/kotaemon/kotaemon/indices/qa/citation_qa.py @@ -0,0 +1,390 @@ +import threading +from collections import defaultdict +from typing import Generator + +import numpy as np +from theflow.settings import settings as flowsettings + +from kotaemon.base import ( + AIMessage, + BaseComponent, + Document, + HumanMessage, + Node, + SystemMessage, +) +from kotaemon.llms import ChatLLM, PromptTemplate + +from .citation import CitationPipeline +from .format_context import ( + EVIDENCE_MODE_FIGURE, + EVIDENCE_MODE_TABLE, + EVIDENCE_MODE_TEXT, +) +from .utils import find_text + +try: + from ktem.llms.manager import llms + from ktem.reasoning.prompt_optimization.mindmap import CreateMindmapPipeline + from ktem.utils.render import Render +except ImportError: + raise ImportError("Please install `ktem` to use this component") + +MAX_IMAGES = 10 +CITATION_TIMEOUT = 5.0 +CONTEXT_RELEVANT_WARNING_SCORE = 0.7 + +DEFAULT_QA_TEXT_PROMPT = ( + "Use the following pieces of context to answer the question at the end in detail with clear explanation. " # noqa: E501 + "If you don't know the answer, just say that you don't know, don't try to " + "make up an answer. Give answer in " + "{lang}.\n\n" + "{context}\n" + "Question: {question}\n" + "Helpful Answer:" +) + +DEFAULT_QA_TABLE_PROMPT = ( + "Use the given context: texts, tables, and figures below to answer the question, " + "then provide answer with clear explanation." + "If you don't know the answer, just say that you don't know, " + "don't try to make up an answer. Give answer in {lang}.\n\n" + "Context:\n" + "{context}\n" + "Question: {question}\n" + "Helpful Answer:" +) # noqa + +DEFAULT_QA_CHATBOT_PROMPT = ( + "Pick the most suitable chatbot scenarios to answer the question at the end, " + "output the provided answer text. If you don't know the answer, " + "just say that you don't know. Keep the answer as concise as possible. " + "Give answer in {lang}.\n\n" + "Context:\n" + "{context}\n" + "Question: {question}\n" + "Answer:" +) # noqa + +DEFAULT_QA_FIGURE_PROMPT = ( + "Use the given context: texts, tables, and figures below to answer the question. " + "If you don't know the answer, just say that you don't know. " + "Give answer in {lang}.\n\n" + "Context: \n" + "{context}\n" + "Question: {question}\n" + "Answer: " +) # noqa + + +class AnswerWithContextPipeline(BaseComponent): + """Answer the question based on the evidence + + Args: + llm: the language model to generate the answer + citation_pipeline: generates citation from the evidence + qa_template: the prompt template for LLM to generate answer (refer to + evidence_mode) + qa_table_template: the prompt template for LLM to generate answer for table + (refer to evidence_mode) + qa_chatbot_template: the prompt template for LLM to generate answer for + pre-made scenarios (refer to evidence_mode) + lang: the language of the answer. Currently support English and Japanese + """ + + llm: ChatLLM = Node(default_callback=lambda _: llms.get_default()) + vlm_endpoint: str = getattr(flowsettings, "KH_VLM_ENDPOINT", "") + use_multimodal: bool = getattr(flowsettings, "KH_REASONINGS_USE_MULTIMODAL", True) + citation_pipeline: CitationPipeline = Node( + default_callback=lambda _: CitationPipeline(llm=llms.get_default()) + ) + create_mindmap_pipeline: CreateMindmapPipeline = Node( + default_callback=lambda _: CreateMindmapPipeline(llm=llms.get_default()) + ) + + qa_template: str = DEFAULT_QA_TEXT_PROMPT + qa_table_template: str = DEFAULT_QA_TABLE_PROMPT + qa_chatbot_template: str = DEFAULT_QA_CHATBOT_PROMPT + qa_figure_template: str = DEFAULT_QA_FIGURE_PROMPT + + enable_citation: bool = False + enable_mindmap: bool = False + enable_citation_viz: bool = False + + system_prompt: str = "" + lang: str = "English" # support English and Japanese + n_last_interactions: int = 5 + + def get_prompt(self, question, evidence, evidence_mode: int): + """Prepare the prompt and other information for LLM""" + if evidence_mode == EVIDENCE_MODE_TEXT: + prompt_template = PromptTemplate(self.qa_template) + elif evidence_mode == EVIDENCE_MODE_TABLE: + prompt_template = PromptTemplate(self.qa_table_template) + elif evidence_mode == EVIDENCE_MODE_FIGURE: + if self.use_multimodal: + prompt_template = PromptTemplate(self.qa_figure_template) + else: + prompt_template = PromptTemplate(self.qa_template) + else: + prompt_template = PromptTemplate(self.qa_chatbot_template) + + prompt = prompt_template.populate( + context=evidence, + question=question, + lang=self.lang, + ) + + return prompt, evidence + + def run( + self, question: str, evidence: str, evidence_mode: int = 0, **kwargs + ) -> Document: + return self.invoke(question, evidence, evidence_mode, **kwargs) + + def invoke( + self, + question: str, + evidence: str, + evidence_mode: int = 0, + images: list[str] = [], + **kwargs, + ) -> Document: + raise NotImplementedError + + async def ainvoke( # type: ignore + self, + question: str, + evidence: str, + evidence_mode: int = 0, + images: list[str] = [], + **kwargs, + ) -> Document: + """Answer the question based on the evidence + + In addition to the question and the evidence, this method also take into + account evidence_mode. The evidence_mode tells which kind of evidence is. + The kind of evidence affects: + 1. How the evidence is represented. + 2. The prompt to generate the answer. + + By default, the evidence_mode is 0, which means the evidence is plain text with + no particular semantic representation. The evidence_mode can be: + 1. "table": There will be HTML markup telling that there is a table + within the evidence. + 2. "chatbot": There will be HTML markup telling that there is a chatbot. + This chatbot is a scenario, extracted from an Excel file, where each + row corresponds to an interaction. + + Args: + question: the original question posed by user + evidence: the text that contain relevant information to answer the question + (determined by retrieval pipeline) + evidence_mode: the mode of evidence, 0 for text, 1 for table, 2 for chatbot + """ + raise NotImplementedError + + def stream( # type: ignore + self, + question: str, + evidence: str, + evidence_mode: int = 0, + images: list[str] = [], + **kwargs, + ) -> Generator[Document, None, Document]: + history = kwargs.get("history", []) + print(f"Got {len(images)} images") + # check if evidence exists, use QA prompt + if evidence: + prompt, evidence = self.get_prompt(question, evidence, evidence_mode) + else: + prompt = question + + # retrieve the citation + citation = None + mindmap = None + + def citation_call(): + nonlocal citation + citation = self.citation_pipeline(context=evidence, question=question) + + def mindmap_call(): + nonlocal mindmap + mindmap = self.create_mindmap_pipeline(context=evidence, question=question) + + citation_thread = None + mindmap_thread = None + + # execute function call in thread + if evidence: + if self.enable_citation: + citation_thread = threading.Thread(target=citation_call) + citation_thread.start() + + if self.enable_mindmap: + mindmap_thread = threading.Thread(target=mindmap_call) + mindmap_thread.start() + + output = "" + logprobs = [] + + messages = [] + if self.system_prompt: + messages.append(SystemMessage(content=self.system_prompt)) + + for human, ai in history[-self.n_last_interactions :]: + messages.append(HumanMessage(content=human)) + messages.append(AIMessage(content=ai)) + + if self.use_multimodal and evidence_mode == EVIDENCE_MODE_FIGURE: + # create image message: + messages.append( + HumanMessage( + content=[ + {"type": "text", "text": prompt}, + ] + + [ + { + "type": "image_url", + "image_url": {"url": image}, + } + for image in images[:MAX_IMAGES] + ], + ) + ) + else: + # append main prompt + messages.append(HumanMessage(content=prompt)) + + try: + # try streaming first + print("Trying LLM streaming") + for out_msg in self.llm.stream(messages): + output += out_msg.text + logprobs += out_msg.logprobs + yield Document(channel="chat", content=out_msg.text) + except NotImplementedError: + print("Streaming is not supported, falling back to normal processing") + output = self.llm(messages).text + yield Document(channel="chat", content=output) + + if logprobs: + qa_score = np.exp(np.average(logprobs)) + else: + qa_score = None + + if citation_thread: + citation_thread.join(timeout=CITATION_TIMEOUT) + if mindmap_thread: + mindmap_thread.join(timeout=CITATION_TIMEOUT) + + answer = Document( + text=output, + metadata={ + "citation_viz": self.enable_citation_viz, + "mindmap": mindmap, + "citation": citation, + "qa_score": qa_score, + }, + ) + + return answer + + def match_evidence_with_context(self, answer, docs) -> dict[str, list[dict]]: + """Match the evidence with the context""" + spans: dict[str, list[dict]] = defaultdict(list) + + if not answer.metadata["citation"]: + return spans + + evidences = answer.metadata["citation"].evidences + for quote in evidences: + matched_excerpts = [] + for doc in docs: + matches = find_text(quote, doc.text) + + for start, end in matches: + if "|" not in doc.text[start:end]: + spans[doc.doc_id].append( + { + "start": start, + "end": end, + } + ) + matched_excerpts.append(doc.text[start:end]) + + # print("Matched citation:", quote, matched_excerpts), + return spans + + def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document]]: + """Prepare the citations to show on the UI""" + with_citation, without_citation = [], [] + has_llm_score = any("llm_trulens_score" in doc.metadata for doc in docs) + + spans = self.match_evidence_with_context(answer, docs) + id2docs = {doc.doc_id: doc for doc in docs} + not_detected = set(id2docs.keys()) - set(spans.keys()) + + # render highlight spans + for _id, ss in spans.items(): + if not ss: + not_detected.add(_id) + continue + cur_doc = id2docs[_id] + highlight_text = "" + + ss = sorted(ss, key=lambda x: x["start"]) + text = cur_doc.text[: ss[0]["start"]] + for idx, span in enumerate(ss): + to_highlight = cur_doc.text[span["start"] : span["end"]] + if len(to_highlight) > len(highlight_text): + highlight_text = to_highlight + + span_idx = span.get("idx", None) + if span_idx is not None: + to_highlight = f"【{span_idx + 1}】" + to_highlight + + text += Render.highlight( + to_highlight, + elem_id=str(span_idx + 1) if span_idx is not None else None, + ) + print(text) + if idx < len(ss) - 1: + text += cur_doc.text[span["end"] : ss[idx + 1]["start"]] + text += cur_doc.text[ss[-1]["end"] :] + # add to display list + with_citation.append( + Document( + channel="info", + content=Render.collapsible_with_header_score( + cur_doc, + override_text=text, + highlight_text=highlight_text, + open_collapsible=True, + ), + ) + ) + + print("Got {} cited docs".format(len(with_citation))) + + sorted_not_detected_items_with_scores = [ + (id_, id2docs[id_].metadata.get("llm_trulens_score", 0.0)) + for id_ in not_detected + ] + sorted_not_detected_items_with_scores.sort(key=lambda x: x[1], reverse=True) + + for id_, _ in sorted_not_detected_items_with_scores: + doc = id2docs[id_] + doc_score = doc.metadata.get("llm_trulens_score", 0.0) + is_open = not has_llm_score or ( + doc_score > CONTEXT_RELEVANT_WARNING_SCORE and len(with_citation) == 0 + ) + without_citation.append( + Document( + channel="info", + content=Render.collapsible_with_header_score( + doc, open_collapsible=is_open + ), + ) + ) + return with_citation, without_citation diff --git a/libs/kotaemon/kotaemon/indices/qa/citation_qa_inline.py b/libs/kotaemon/kotaemon/indices/qa/citation_qa_inline.py new file mode 100644 index 0000000..414e4e2 --- /dev/null +++ b/libs/kotaemon/kotaemon/indices/qa/citation_qa_inline.py @@ -0,0 +1,267 @@ +import re +import threading +from collections import defaultdict +from typing import Generator + +import numpy as np + +from kotaemon.base import AIMessage, Document, HumanMessage, SystemMessage +from kotaemon.llms import PromptTemplate + +from .citation import CiteEvidence +from .citation_qa import CITATION_TIMEOUT, MAX_IMAGES, AnswerWithContextPipeline +from .format_context import EVIDENCE_MODE_FIGURE +from .utils import find_start_end_phrase + +DEFAULT_QA_CITATION_PROMPT = """ +Use the following pieces of context to answer the question at the end. +Provide DETAILED ansswer with clear explanation. +Format answer with easy to follow bullets / paragraphs. +If you don't know the answer, just say that you don't know, don't try to make up an answer. +Use the same language as the question to response. + +CONTEXT: +---- +{context} +---- + +Answer using this format: +CITATION LIST + +// the index in this array +CITATION【number】 + +// output 2 phrase to mark start and end of the relevant span +// each has ~ 6 words +// MUST COPY EXACTLY from the CONTEXT +// NO CHANGE or REPHRASE +// RELEVANT_SPAN_FROM_CONTEXT +START_PHRASE: string +END_PHRASE: string + +// When you answer, ensure to add citations from the documents +// in the CONTEXT with a number that corresponds to the answersInText array. +// (in the form [number]) +// Try to include the number after each facts / statements you make. +// You can create as many citations as you need. +FINAL ANSWER +string + +STRICTLY FOLLOW THIS EXAMPLE: +CITATION LIST + +CITATION【1】 + +START_PHRASE: Known as fixed-size chunking , the traditional +END_PHRASE: not degrade the final retrieval performance. + +CITATION【2】 + +START_PHRASE: Fixed-size Chunker This is our baseline chunker +END_PHRASE: this shows good retrieval quality. + +FINAL ANSWER +An alternative to semantic chunking is fixed-size chunking. This traditional method involves splitting documents into chunks of a predetermined or user-specified size, regardless of semantic content, which is computationally efficient【1】. However, it may result in the fragmentation of semantically related content, thereby potentially degrading retrieval performance【2】. + +QUESTION: {question}\n +ANSWER: +""" # noqa + + +class AnswerWithInlineCitation(AnswerWithContextPipeline): + """Answer the question based on the evidence with inline citation""" + + qa_citation_template: str = DEFAULT_QA_CITATION_PROMPT + + def get_prompt(self, question, evidence, evidence_mode: int): + """Prepare the prompt and other information for LLM""" + prompt_template = PromptTemplate(self.qa_citation_template) + + prompt = prompt_template.populate( + context=evidence, + question=question, + safe=False, + ) + + return prompt, evidence + + def answer_to_citations(self, answer): + evidences = [] + lines = answer.split("\n") + for line in lines: + for keyword in ["START_PHRASE:", "END_PHRASE:"]: + if line.startswith(keyword): + evidences.append(line[len(keyword) :].strip()) + + return CiteEvidence(evidences=evidences) + + def replace_citation_with_link(self, answer: str): + # Define the regex pattern to match 【number】 + pattern = r"【\d+】" + matches = re.finditer(pattern, answer) + + matched_citations = set() + for match in matches: + citation = match.group() + matched_citations.add(citation) + + for citation in matched_citations: + print("Found citation:", citation) + answer = answer.replace( + citation, + ( + "{citation}" + ), + ) + + print("Replaced answer:", answer) + return answer + + def stream( # type: ignore + self, + question: str, + evidence: str, + evidence_mode: int = 0, + images: list[str] = [], + **kwargs, + ) -> Generator[Document, None, Document]: + history = kwargs.get("history", []) + print(f"Got {len(images)} images") + # check if evidence exists, use QA prompt + if evidence: + prompt, evidence = self.get_prompt(question, evidence, evidence_mode) + else: + prompt = question + + output = "" + logprobs = [] + + citation = None + mindmap = None + + def mindmap_call(): + nonlocal mindmap + mindmap = self.create_mindmap_pipeline(context=evidence, question=question) + + mindmap_thread = None + + # execute function call in thread + if evidence: + if self.enable_mindmap: + mindmap_thread = threading.Thread(target=mindmap_call) + mindmap_thread.start() + + messages = [] + if self.system_prompt: + messages.append(SystemMessage(content=self.system_prompt)) + + for human, ai in history[-self.n_last_interactions :]: + messages.append(HumanMessage(content=human)) + messages.append(AIMessage(content=ai)) + + if self.use_multimodal and evidence_mode == EVIDENCE_MODE_FIGURE: + # create image message: + messages.append( + HumanMessage( + content=[ + {"type": "text", "text": prompt}, + ] + + [ + { + "type": "image_url", + "image_url": {"url": image}, + } + for image in images[:MAX_IMAGES] + ], + ) + ) + else: + # append main prompt + messages.append(HumanMessage(content=prompt)) + + START_ANSWER = "FINAL ANSWER" + start_of_answer = True + final_answer = "" + + try: + # try streaming first + print("Trying LLM streaming") + for out_msg in self.llm.stream(messages): + if START_ANSWER in output: + final_answer += ( + out_msg.text.lstrip() if start_of_answer else out_msg.text + ) + start_of_answer = False + yield Document(channel="chat", content=out_msg.text) + + output += out_msg.text + logprobs += out_msg.logprobs + except NotImplementedError: + print("Streaming is not supported, falling back to normal processing") + output = self.llm(messages).text + yield Document(channel="chat", content=output) + + if logprobs: + qa_score = np.exp(np.average(logprobs)) + else: + qa_score = None + + citation = self.answer_to_citations(output) + + if mindmap_thread: + mindmap_thread.join(timeout=CITATION_TIMEOUT) + + # convert citation to link + answer = Document( + text=final_answer, + metadata={ + "citation_viz": self.enable_citation_viz, + "mindmap": mindmap, + "citation": citation, + "qa_score": qa_score, + }, + ) + + # yield the final answer + final_answer = self.replace_citation_with_link(final_answer) + yield Document(channel="chat", content=None) + yield Document(channel="chat", content=final_answer) + + return answer + + def match_evidence_with_context(self, answer, docs) -> dict[str, list[dict]]: + """Match the evidence with the context""" + spans: dict[str, list[dict]] = defaultdict(list) + + if not answer.metadata["citation"]: + return spans + + evidences = answer.metadata["citation"].evidences + + for start_idx in range(0, len(evidences), 2): + start_phrase, end_phrase = evidences[start_idx : start_idx + 2] + best_match = None + best_match_length = 0 + best_match_doc_idx = None + + for doc in docs: + match, match_length = find_start_end_phrase( + start_phrase, end_phrase, doc.text + ) + if best_match is None or ( + match is not None and match_length > best_match_length + ): + best_match = match + best_match_length = match_length + best_match_doc_idx = doc.doc_id + + if best_match is not None and best_match_doc_idx is not None: + spans[best_match_doc_idx].append( + { + "start": best_match[0], + "end": best_match[1], + "idx": start_idx // 2, # implicitly set from the start_idx + } + ) + return spans diff --git a/libs/kotaemon/kotaemon/indices/qa/format_context.py b/libs/kotaemon/kotaemon/indices/qa/format_context.py new file mode 100644 index 0000000..4727b1c --- /dev/null +++ b/libs/kotaemon/kotaemon/indices/qa/format_context.py @@ -0,0 +1,114 @@ +import html +from functools import partial + +import tiktoken + +from kotaemon.base import BaseComponent, Document, RetrievedDocument +from kotaemon.indices.splitters import TokenSplitter + +EVIDENCE_MODE_TEXT = 0 +EVIDENCE_MODE_TABLE = 1 +EVIDENCE_MODE_CHATBOT = 2 +EVIDENCE_MODE_FIGURE = 3 + + +class PrepareEvidencePipeline(BaseComponent): + """Prepare the evidence text from the list of retrieved documents + + This step usually happens after `DocumentRetrievalPipeline`. + + Args: + trim_func: a callback function or a BaseComponent, that splits a large + chunk of text into smaller ones. The first one will be retained. + """ + + max_context_length: int = 32000 + trim_func: TokenSplitter | None = None + + def run(self, docs: list[RetrievedDocument]) -> Document: + evidence = "" + images = [] + table_found = 0 + evidence_modes = [] + + evidence_trim_func = ( + self.trim_func + if self.trim_func + else TokenSplitter( + chunk_size=self.max_context_length, + chunk_overlap=0, + separator=" ", + tokenizer=partial( + tiktoken.encoding_for_model("gpt-3.5-turbo").encode, + allowed_special=set(), + disallowed_special="all", + ), + ) + ) + + for _, retrieved_item in enumerate(docs): + retrieved_content = "" + page = retrieved_item.metadata.get("page_label", None) + source = filename = retrieved_item.metadata.get("file_name", "-") + if page: + source += f" (Page {page})" + if retrieved_item.metadata.get("type", "") == "table": + evidence_modes.append(EVIDENCE_MODE_TABLE) + if table_found < 5: + retrieved_content = retrieved_item.metadata.get( + "table_origin", retrieved_item.text + ) + if retrieved_content not in evidence: + table_found += 1 + evidence += ( + f"
Table from {source}\n" + + retrieved_content + + "\n
" + ) + elif retrieved_item.metadata.get("type", "") == "chatbot": + evidence_modes.append(EVIDENCE_MODE_CHATBOT) + retrieved_content = retrieved_item.metadata["window"] + evidence += ( + f"
Chatbot scenario from {filename} (Row {page})\n" + + retrieved_content + + "\n
" + ) + elif retrieved_item.metadata.get("type", "") == "image": + evidence_modes.append(EVIDENCE_MODE_FIGURE) + retrieved_content = retrieved_item.metadata.get("image_origin", "") + retrieved_caption = html.escape(retrieved_item.get_content()) + evidence += ( + f"
Figure from {source}\n" + + "" + + "\n
" + ) + images.append(retrieved_content) + else: + if "window" in retrieved_item.metadata: + retrieved_content = retrieved_item.metadata["window"] + else: + retrieved_content = retrieved_item.text + retrieved_content = retrieved_content.replace("\n", " ") + if retrieved_content not in evidence: + evidence += ( + f"
Content from {source}: " + + retrieved_content + + " \n
" + ) + + # resolve evidence mode + evidence_mode = EVIDENCE_MODE_TEXT + if EVIDENCE_MODE_FIGURE in evidence_modes: + evidence_mode = EVIDENCE_MODE_FIGURE + elif EVIDENCE_MODE_TABLE in evidence_modes: + evidence_mode = EVIDENCE_MODE_TABLE + + # trim context by trim_len + print("len (original)", len(evidence)) + if evidence: + texts = evidence_trim_func([Document(text=evidence)]) + evidence = texts[0].text + print("len (trimmed)", len(evidence)) + + return Document(content=(evidence_mode, evidence, images)) diff --git a/libs/kotaemon/kotaemon/indices/qa/text_based.py b/libs/kotaemon/kotaemon/indices/qa/text_based.py deleted file mode 100644 index e0b49be..0000000 --- a/libs/kotaemon/kotaemon/indices/qa/text_based.py +++ /dev/null @@ -1,63 +0,0 @@ -import os - -from kotaemon.base import BaseComponent, Document, Node, RetrievedDocument -from kotaemon.llms import BaseLLM, LCAzureChatOpenAI, PromptTemplate - -from .citation import CitationPipeline - - -class CitationQAPipeline(BaseComponent): - """Answering question from a text corpus with citation""" - - qa_prompt_template: PromptTemplate = PromptTemplate( - 'Answer the following question: "{question}". ' - "The context is: \n{context}\nAnswer: " - ) - llm: BaseLLM = LCAzureChatOpenAI.withx( - azure_endpoint="https://bleh-dummy.openai.azure.com/", - openai_api_key=os.environ.get("OPENAI_API_KEY", ""), - openai_api_version="2023-07-01-preview", - deployment_name="dummy-q2-16k", - temperature=0, - request_timeout=60, - ) - citation_pipeline: CitationPipeline = Node( - default_callback=lambda self: CitationPipeline(llm=self.llm) - ) - - def _format_doc_text(self, text: str) -> str: - """Format the text of each document""" - return text.replace("\n", " ") - - def _format_retrieved_context(self, documents: list[RetrievedDocument]) -> str: - """Format the texts between all documents""" - matched_texts: list[str] = [ - self._format_doc_text(doc.text) for doc in documents - ] - return "\n\n".join(matched_texts) - - def run( - self, - question: str, - documents: list[RetrievedDocument], - use_citation: bool = False, - **kwargs - ) -> Document: - # retrieve relevant documents as context - context = self._format_retrieved_context(documents) - self.log_progress(".context", context=context) - - # generate the answer - prompt = self.qa_prompt_template.populate( - context=context, - question=question, - ) - self.log_progress(".prompt", prompt=prompt) - answer_text = self.llm(prompt).text - if use_citation: - citation = self.citation_pipeline(context=context, question=question) - else: - citation = None - - answer = Document(text=answer_text, metadata={"citation": citation}) - return answer diff --git a/libs/kotaemon/kotaemon/indices/qa/utils.py b/libs/kotaemon/kotaemon/indices/qa/utils.py new file mode 100644 index 0000000..4b6495a --- /dev/null +++ b/libs/kotaemon/kotaemon/indices/qa/utils.py @@ -0,0 +1,53 @@ +from difflib import SequenceMatcher + + +def find_text(search_span, context, min_length=5): + sentence_list = search_span.split("\n") + context = context.replace("\n", " ") + + matches = [] + # don't search for small text + if len(search_span) > min_length: + for sentence in sentence_list: + match = SequenceMatcher( + None, sentence, context, autojunk=False + ).find_longest_match() + if match.size > max(len(sentence) * 0.35, min_length): + matches.append((match.b, match.b + match.size)) + + return matches + + +def find_start_end_phrase( + start_phrase, end_phrase, context, min_length=5, max_excerpt_length=300 +): + context = context.replace("\n", " ") + + matches = [] + matched_length = 0 + for sentence in [start_phrase, end_phrase]: + match = SequenceMatcher( + None, sentence, context, autojunk=False + ).find_longest_match() + if match.size > max(len(sentence) * 0.35, min_length): + matches.append((match.b, match.b + match.size)) + matched_length += match.size + + # check if second match is before the first match + if len(matches) == 2 and matches[1][0] < matches[0][0]: + # if so, keep only the first match + matches = [matches[0]] + + if matches: + start_idx = min(start for start, _ in matches) + end_idx = max(end for _, end in matches) + + # check if the excerpt is too long + if end_idx - start_idx > max_excerpt_length: + end_idx = start_idx + max_excerpt_length + + final_match = (start_idx, end_idx) + else: + final_match = None + + return final_match, matched_length diff --git a/libs/kotaemon/kotaemon/indices/vectorindex.py b/libs/kotaemon/kotaemon/indices/vectorindex.py index e8f79a6..5bf77c3 100644 --- a/libs/kotaemon/kotaemon/indices/vectorindex.py +++ b/libs/kotaemon/kotaemon/indices/vectorindex.py @@ -42,14 +42,6 @@ class VectorIndexing(BaseIndexing): **kwargs, ) - def to_qa_pipeline(self, *args, **kwargs): - from .qa import CitationQAPipeline - - return TextVectorQA( - retrieving_pipeline=self.to_retrieval_pipeline(**kwargs), - qa_pipeline=CitationQAPipeline(**kwargs), - ) - def write_chunk_to_file(self, docs: list[Document]): # save the chunks content into markdown format if self.cache_dir: diff --git a/libs/kotaemon/kotaemon/llms/prompts/template.py b/libs/kotaemon/kotaemon/llms/prompts/template.py index aa758be..b189045 100644 --- a/libs/kotaemon/kotaemon/llms/prompts/template.py +++ b/libs/kotaemon/kotaemon/llms/prompts/template.py @@ -72,7 +72,7 @@ class PromptTemplate: UserWarning, ) - def populate(self, **kwargs) -> str: + def populate(self, safe=True, **kwargs) -> str: """ Strictly populate the template with the given keyword arguments. @@ -86,7 +86,8 @@ class PromptTemplate: Raises: ValueError: If an unknown placeholder is provided. """ - self.check_missing_kwargs(**kwargs) + if safe: + self.check_missing_kwargs(**kwargs) return self.partial_populate(**kwargs) diff --git a/libs/ktem/ktem/assets/css/main.css b/libs/ktem/ktem/assets/css/main.css index 6336b02..ae6fe7d 100644 --- a/libs/ktem/ktem/assets/css/main.css +++ b/libs/ktem/ktem/assets/css/main.css @@ -97,6 +97,11 @@ button.selected { #chat-info-panel { max-height: var(--main-area-height) !important; overflow: auto !important; + transition: all 0.5s; +} + +body.dark #chat-info-panel figure>img{ + filter: invert(100%); } #conv-settings-panel { @@ -199,11 +204,26 @@ mark { right: 15px; } -/* #new-conv-button > img { - position: relative; - top: 0px; - right: -50%; -} */ +#use-mindmap-checkbox { + position: absolute; + width: 110px; + top: 10px; + right: 25px; +} + +#quick-url textarea { + resize: none; + background: transparent; + margin-top: 0px; +} + +#quick-url textarea::placeholder { + text-align: center; +} + +#quick-file { + height: 110px; +} span.icon { color: #cecece; @@ -225,11 +245,6 @@ span.icon { overflow: unset !important; } -/*body {*/ -/* margin: 0;*/ -/* font-family: Arial, sans-serif;*/ -/*}*/ - pdfjs-viewer-element { height: 100vh; height: 100dvh; @@ -280,8 +295,7 @@ pdfjs-viewer-element { overflow: auto; } -/** Switch - -------------------------------------*/ +/* Switch checkbox styles */ #is-public-checkbox { position: relative; @@ -293,10 +307,6 @@ pdfjs-viewer-element { opacity: 0; } -/** - * 1. Adjust this to size - */ - .switch { display: inline-block; /* 1 */ @@ -330,3 +340,28 @@ pdfjs-viewer-element { .switch:has(> input:checked) { background: #0c895f; } + +/* Bot animation */ + +.message.bot { + animation: fadein 1.5s ease-in-out forwards; +} + +details.evidence { + animation: fadein 0.5s ease-in-out forwards; +} + +@keyframes fadein { + 0% { + opacity: 0; + } + + 100% { + opacity: 100%; + } +} + +.message a.citation { + color: #10b981; + text-decoration: none; +} diff --git a/libs/ktem/ktem/assets/js/main.js b/libs/ktem/ktem/assets/js/main.js index d2d0b81..30c4067 100644 --- a/libs/ktem/ktem/assets/js/main.js +++ b/libs/ktem/ktem/assets/js/main.js @@ -16,6 +16,11 @@ function run() { let chat_info_panel = document.getElementById("info-expand"); chat_info_panel.insertBefore(info_expand_button, chat_info_panel.childNodes[2]); + // move use mind-map checkbox + let mindmap_checkbox = document.getElementById("use-mindmap-checkbox"); + let chat_setting_panel = document.getElementById("chat-settings-expand"); + chat_setting_panel.insertBefore(mindmap_checkbox, chat_setting_panel.childNodes[2]); + // create slider toggle const is_public_checkbox = document.getElementById("is-public-checkbox"); const label_element = is_public_checkbox.getElementsByTagName("label")[0]; @@ -49,4 +54,21 @@ function run() { globalThis.removeFromStorage = (key) => { localStorage.removeItem(key) } + + // Function to scroll to given citation with ID + // Sleep function using Promise and setTimeout + function sleep(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); + } + + globalThis.scrollToCitation = async (event) => { + event.preventDefault(); // Prevent the default link behavior + var citationId = event.target.getAttribute('id'); + + await sleep(100); // Sleep for 500 milliseconds + var citation = document.querySelector('mark[id="' + citationId + '"]'); + if (citation) { + citation.scrollIntoView({ behavior: 'smooth' }); + } + } } diff --git a/libs/ktem/ktem/db/base_models.py b/libs/ktem/ktem/db/base_models.py index 0c6004b..7c49705 100644 --- a/libs/ktem/ktem/db/base_models.py +++ b/libs/ktem/ktem/db/base_models.py @@ -25,8 +25,8 @@ class BaseConversation(SQLModel): default_factory=lambda: uuid.uuid4().hex, primary_key=True, index=True ) name: str = Field( - default_factory=lambda: datetime.datetime.now(get_localzone()).strftime( - "%Y-%m-%d %H:%M:%S" + default_factory=lambda: "Untitled - {}".format( + datetime.datetime.now(get_localzone()).strftime("%Y-%m-%d %H:%M:%S") ) ) user: int = Field(default=0) # For now we only have one user diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py index 31a48b1..6b0033c 100644 --- a/libs/ktem/ktem/index/file/pipelines.py +++ b/libs/ktem/ktem/index/file/pipelines.py @@ -126,6 +126,9 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever): if doc_ids: flatten_doc_ids = [] for doc_id in doc_ids: + if doc_id is None: + raise ValueError("No document is selected") + if doc_id.startswith("["): flatten_doc_ids.extend(json.loads(doc_id)) else: diff --git a/libs/ktem/ktem/index/file/ui.py b/libs/ktem/ktem/index/file/ui.py index e440fbf..7a97aae 100644 --- a/libs/ktem/ktem/index/file/ui.py +++ b/libs/ktem/ktem/index/file/ui.py @@ -22,6 +22,13 @@ from theflow.settings import settings as flowsettings DOWNLOAD_MESSAGE = "Press again to download" MAX_FILENAME_LENGTH = 20 +chat_input_focus_js = """ +function() { + let chatInput = document.querySelector("#chat-input textarea"); + chatInput.focus(); +} +""" + class File(gr.File): """Subclass from gr.File to maintain the original filename @@ -666,7 +673,7 @@ class FileIndexPage(BasePage): outputs=self._app.chat_page.quick_file_upload_status, ) .then( - fn=self.index_fn_with_default_loaders, + fn=self.index_fn_file_with_default_loaders, inputs=[ self._app.chat_page.quick_file_upload, gr.State(value=False), @@ -689,6 +696,38 @@ class FileIndexPage(BasePage): for event in self._app.get_event(f"onFileIndex{self._index.id}Changed"): quickUploadedEvent = quickUploadedEvent.then(**event) + quickURLUploadedEvent = ( + self._app.chat_page.quick_urls.submit( + fn=lambda: gr.update( + value="Please wait for the indexing process " + "to complete before adding your question." + ), + outputs=self._app.chat_page.quick_file_upload_status, + ) + .then( + fn=self.index_fn_url_with_default_loaders, + inputs=[ + self._app.chat_page.quick_urls, + gr.State(value=True), + self._app.settings_state, + self._app.user_id, + ], + outputs=self.quick_upload_state, + ) + .success( + fn=lambda: [ + gr.update(value=None), + gr.update(value="select"), + ], + outputs=[ + self._app.chat_page.quick_urls, + self._app.chat_page._indices_input[0], + ], + ) + ) + for event in self._app.get_event(f"onFileIndex{self._index.id}Changed"): + quickURLUploadedEvent = quickURLUploadedEvent.then(**event) + quickUploadedEvent.success( fn=lambda x: x, inputs=self.quick_upload_state, @@ -701,6 +740,30 @@ class FileIndexPage(BasePage): inputs=[self._app.user_id, self.filter], outputs=[self.file_list_state, self.file_list], concurrency_limit=20, + ).then( + fn=lambda: True, + inputs=None, + outputs=None, + js=chat_input_focus_js, + ) + + quickURLUploadedEvent.success( + fn=lambda x: x, + inputs=self.quick_upload_state, + outputs=self._app.chat_page._indices_input[1], + ).then( + fn=lambda: gr.update(value="Indexing completed."), + outputs=self._app.chat_page.quick_file_upload_status, + ).then( + fn=self.list_file, + inputs=[self._app.user_id, self.filter], + outputs=[self.file_list_state, self.file_list], + concurrency_limit=20, + ).then( + fn=lambda: True, + inputs=None, + outputs=None, + js=chat_input_focus_js, ) except Exception as e: @@ -951,7 +1014,7 @@ class FileIndexPage(BasePage): return results - def index_fn_with_default_loaders( + def index_fn_file_with_default_loaders( self, files, reindex: bool, settings, user_id ) -> list["str"]: """Function for quick upload with default loaders @@ -991,6 +1054,22 @@ class FileIndexPage(BasePage): return exist_ids + returned_ids + def index_fn_url_with_default_loaders(self, urls, reindex: bool, settings, user_id): + returned_ids = [] + settings = deepcopy(settings) + settings[f"index.options.{self._index.id}.reader_mode"] = "default" + settings[f"index.options.{self._index.id}.quick_index_mode"] = True + + if urls: + _iter = self.index_fn([], urls, reindex, settings, user_id) + try: + while next(_iter): + pass + except StopIteration as e: + returned_ids = e.value + + return returned_ids + def index_files_from_dir( self, folder_path, reindex, settings, user_id ) -> Generator[tuple[str, str], None, None]: diff --git a/libs/ktem/ktem/pages/chat/__init__.py b/libs/ktem/ktem/pages/chat/__init__.py index e25fdba..55f04b3 100644 --- a/libs/ktem/ktem/pages/chat/__init__.py +++ b/libs/ktem/ktem/pages/chat/__init__.py @@ -40,26 +40,52 @@ function() { links[i].onclick = openModal; } - var mindmap_el = document.getElementById('mindmap'); - if (mindmap_el) { - var output = svgPanZoom(mindmap_el); + // Get all citation links and attach click event + var links = document.querySelectorAll("a.citation"); + for (var i = 0; i < links.length; i++) { + links[i].onclick = scrollToCitation; } - var link = document.getElementById("mindmap-toggle"); - if (link) { - link.onclick = function(event) { + var mindmap_el = document.getElementById('mindmap'); + + if (mindmap_el) { + var output = svgPanZoom(mindmap_el); + const svg = mindmap_el.cloneNode(true); + + function on_svg_export(event) { event.preventDefault(); // Prevent the default link behavior - var div = document.getElementById("mindmap-wrapper"); - if (div) { - var currentHeight = div.style.height; - if (currentHeight === '400px') { - var contentHeight = div.scrollHeight; - div.style.height = contentHeight + 'px'; - } else { - div.style.height = '400px' + // convert to a valid XML source + const as_text = new XMLSerializer().serializeToString(svg); + // store in a Blob + const blob = new Blob([as_text], { type: "image/svg+xml" }); + // create an URI pointing to that blob + const url = URL.createObjectURL(blob); + const win = open(url); + // so the Garbage Collector can collect the blob + win.onload = (evt) => URL.revokeObjectURL(url); + } + + var link = document.getElementById("mindmap-toggle"); + if (link) { + link.onclick = function(event) { + event.preventDefault(); // Prevent the default link behavior + var div = document.getElementById("mindmap-wrapper"); + if (div) { + var currentHeight = div.style.height; + if (currentHeight === '400px') { + var contentHeight = div.scrollHeight; + div.style.height = contentHeight + 'px'; + } else { + div.style.height = '400px' + } } - } - }; + }; + } + + var link = document.getElementById("mindmap-export"); + if (link) { + link.addEventListener('click', on_svg_export); + } } return [links.length] @@ -127,6 +153,14 @@ class ChatPage(BasePage): file_count="multiple", container=True, show_label=False, + elem_id="quick-file", + ) + self.quick_urls = gr.Textbox( + placeholder="Or paste URLs here", + lines=1, + container=False, + show_label=False, + elem_id="quick-url", ) self.quick_file_upload_status = gr.Markdown() @@ -136,12 +170,17 @@ class ChatPage(BasePage): self.chat_panel = ChatPanel(self._app) with gr.Row(): - with gr.Accordion(label="Chat settings", open=False): + with gr.Accordion( + label="Chat settings", + elem_id="chat-settings-expand", + open=False, + ): # a quick switch for reasoning type option with gr.Row(): gr.HTML("Reasoning method") gr.HTML("Model") - gr.HTML("Generate mindmap") + gr.HTML("Language") + gr.HTML("Citation") with gr.Row(): reasoning_type_values = [ @@ -165,17 +204,36 @@ class ChatPage(BasePage): container=False, show_label=False, ) - binary_default_choices = [ - (DEFAULT_SETTING, DEFAULT_SETTING), - ("Enable", True), - ("Disable", False), - ] - self.use_mindmap = gr.Dropdown( + self.language = gr.Dropdown( + choices=[ + (DEFAULT_SETTING, DEFAULT_SETTING), + ] + + self._app.default_settings.reasoning.settings[ + "lang" + ].choices, value=DEFAULT_SETTING, - choices=binary_default_choices, container=False, show_label=False, ) + self.citation = gr.Dropdown( + choices=[ + (DEFAULT_SETTING, DEFAULT_SETTING), + ] + + self._app.default_settings.reasoning.options["simple"] + .settings["highlight_citation"] + .choices, + value=DEFAULT_SETTING, + container=False, + show_label=False, + interactive=True, + ) + + self.use_mindmap = gr.State(value=DEFAULT_SETTING) + self.use_mindmap_check = gr.Checkbox( + label="Mindmap (default)", + container=False, + elem_id="use-mindmap-checkbox", + ) with gr.Column( scale=INFO_PANEL_SCALES[False], elem_id="chat-info-panel" @@ -235,6 +293,8 @@ class ChatPage(BasePage): self._reasoning_type, self.model_type, self.use_mindmap, + self.citation, + self.language, self.state_chat, self._app.user_id, ] @@ -506,6 +566,12 @@ class ChatPage(BasePage): inputs=[self.reasoning_type], outputs=[self._reasoning_type], ) + self.use_mindmap_check.change( + lambda x: (x, gr.update(label="Mindmap " + ("(on)" if x else "(off)"))), + inputs=[self.use_mindmap_check], + outputs=[self.use_mindmap, self.use_mindmap_check], + show_progress="hidden", + ) self.chat_control.conversation_id.change( lambda: gr.update(visible=False), outputs=self.plot_panel, @@ -722,6 +788,8 @@ class ChatPage(BasePage): session_reasoning_type: str, session_llm: str, session_use_mindmap: bool | str, + session_use_citation: str, + session_language: str, state: dict, user_id: int, *selecteds, @@ -743,6 +811,10 @@ class ChatPage(BasePage): session_reasoning_type, "use mindmap", session_use_mindmap, + "use citation", + session_use_citation, + "language", + session_language, ) print("Session LLM", session_llm) reasoning_mode = ( @@ -766,6 +838,14 @@ class ChatPage(BasePage): if session_use_mindmap not in (DEFAULT_SETTING, None): settings["reasoning.options.simple.create_mindmap"] = session_use_mindmap + if session_use_citation not in (DEFAULT_SETTING, None): + settings[ + "reasoning.options.simple.highlight_citation" + ] = session_use_citation + + if session_language not in (DEFAULT_SETTING, None): + settings["reasoning.lang"] = session_language + # get retrievers retrievers = [] for index in self._app.index_manager.indices: @@ -798,6 +878,8 @@ class ChatPage(BasePage): reasoning_type, llm_type, use_mind_map, + use_citation, + language, state, user_id, *selecteds, @@ -814,7 +896,15 @@ class ChatPage(BasePage): # construct the pipeline pipeline, reasoning_state = self.create_pipeline( - settings, reasoning_type, llm_type, use_mind_map, state, user_id, *selecteds + settings, + reasoning_type, + llm_type, + use_mind_map, + use_citation, + language, + state, + user_id, + *selecteds, ) print("Reasoning state", reasoning_state) pipeline.set_output_queue(queue) diff --git a/libs/ktem/ktem/pages/chat/chat_panel.py b/libs/ktem/ktem/pages/chat/chat_panel.py index a7ec3a2..b83c5d1 100644 --- a/libs/ktem/ktem/pages/chat/chat_panel.py +++ b/libs/ktem/ktem/pages/chat/chat_panel.py @@ -28,6 +28,7 @@ class ChatPanel(BasePage): placeholder="Chat input", container=False, show_label=False, + elem_id="chat-input", ) def submit_msg(self, chat_input, chat_history): diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py index 80bebad..fbd861f 100644 --- a/libs/ktem/ktem/reasoning/simple.py +++ b/libs/ktem/ktem/reasoning/simple.py @@ -1,17 +1,10 @@ -import html import logging import threading -from collections import defaultdict -from difflib import SequenceMatcher -from functools import partial from typing import Generator -import numpy as np -import tiktoken from ktem.embeddings.manager import embedding_models_manager as embeddings from ktem.llms.manager import llms from ktem.reasoning.prompt_optimization import ( - CreateMindmapPipeline, DecomposeQuestionPipeline, RewriteQuestionPipeline, ) @@ -19,7 +12,6 @@ from ktem.utils.plantuml import PlantUML from ktem.utils.render import Render from ktem.utils.visualize_cited import CreateCitationVizPipeline from plotly.io import to_json -from theflow.settings import settings as flowsettings from kotaemon.base import ( AIMessage, @@ -30,399 +22,20 @@ from kotaemon.base import ( RetrievedDocument, SystemMessage, ) -from kotaemon.indices.qa.citation import CitationPipeline -from kotaemon.indices.splitters import TokenSplitter -from kotaemon.llms import ChatLLM, PromptTemplate +from kotaemon.indices.qa.citation_qa import ( + CONTEXT_RELEVANT_WARNING_SCORE, + DEFAULT_QA_TEXT_PROMPT, + AnswerWithContextPipeline, +) +from kotaemon.indices.qa.citation_qa_inline import AnswerWithInlineCitation +from kotaemon.indices.qa.format_context import PrepareEvidencePipeline +from kotaemon.llms import ChatLLM from ..utils import SUPPORTED_LANGUAGE_MAP from .base import BaseReasoning logger = logging.getLogger(__name__) -EVIDENCE_MODE_TEXT = 0 -EVIDENCE_MODE_TABLE = 1 -EVIDENCE_MODE_CHATBOT = 2 -EVIDENCE_MODE_FIGURE = 3 -MAX_IMAGES = 10 -CITATION_TIMEOUT = 5.0 - - -def find_text(search_span, context): - sentence_list = search_span.split("\n") - context = context.replace("\n", " ") - - matches = [] - # don't search for small text - if len(search_span) > 5: - for sentence in sentence_list: - match = SequenceMatcher( - None, sentence, context, autojunk=False - ).find_longest_match() - if match.size > max(len(sentence) * 0.35, 5): - matches.append((match.b, match.b + match.size)) - - return matches - - -class PrepareEvidencePipeline(BaseComponent): - """Prepare the evidence text from the list of retrieved documents - - This step usually happens after `DocumentRetrievalPipeline`. - - Args: - trim_func: a callback function or a BaseComponent, that splits a large - chunk of text into smaller ones. The first one will be retained. - """ - - max_context_length: int = 32000 - trim_func: TokenSplitter | None = None - - def run(self, docs: list[RetrievedDocument]) -> Document: - evidence = "" - images = [] - table_found = 0 - evidence_modes = [] - - evidence_trim_func = ( - self.trim_func - if self.trim_func - else TokenSplitter( - chunk_size=self.max_context_length, - chunk_overlap=0, - separator=" ", - tokenizer=partial( - tiktoken.encoding_for_model("gpt-3.5-turbo").encode, - allowed_special=set(), - disallowed_special="all", - ), - ) - ) - - for _id, retrieved_item in enumerate(docs): - retrieved_content = "" - page = retrieved_item.metadata.get("page_label", None) - source = filename = retrieved_item.metadata.get("file_name", "-") - if page: - source += f" (Page {page})" - if retrieved_item.metadata.get("type", "") == "table": - evidence_modes.append(EVIDENCE_MODE_TABLE) - if table_found < 5: - retrieved_content = retrieved_item.metadata.get( - "table_origin", retrieved_item.text - ) - if retrieved_content not in evidence: - table_found += 1 - evidence += ( - f"
Table from {source}\n" - + retrieved_content - + "\n
" - ) - elif retrieved_item.metadata.get("type", "") == "chatbot": - evidence_modes.append(EVIDENCE_MODE_CHATBOT) - retrieved_content = retrieved_item.metadata["window"] - evidence += ( - f"
Chatbot scenario from {filename} (Row {page})\n" - + retrieved_content - + "\n
" - ) - elif retrieved_item.metadata.get("type", "") == "image": - evidence_modes.append(EVIDENCE_MODE_FIGURE) - retrieved_content = retrieved_item.metadata.get("image_origin", "") - retrieved_caption = html.escape(retrieved_item.get_content()) - evidence += ( - f"
Figure from {source}\n" - + "" - + "\n
" - ) - images.append(retrieved_content) - else: - if "window" in retrieved_item.metadata: - retrieved_content = retrieved_item.metadata["window"] - else: - retrieved_content = retrieved_item.text - retrieved_content = retrieved_content.replace("\n", " ") - if retrieved_content not in evidence: - evidence += ( - f"
Content from {source}: " - + retrieved_content - + " \n
" - ) - - # resolve evidence mode - evidence_mode = EVIDENCE_MODE_TEXT - if EVIDENCE_MODE_FIGURE in evidence_modes: - evidence_mode = EVIDENCE_MODE_FIGURE - elif EVIDENCE_MODE_TABLE in evidence_modes: - evidence_mode = EVIDENCE_MODE_TABLE - - # trim context by trim_len - print("len (original)", len(evidence)) - if evidence: - texts = evidence_trim_func([Document(text=evidence)]) - evidence = texts[0].text - print("len (trimmed)", len(evidence)) - - return Document(content=(evidence_mode, evidence, images)) - - -DEFAULT_QA_TEXT_PROMPT = ( - "Use the following pieces of context to answer the question at the end in detail with clear explanation. " # noqa: E501 - "If you don't know the answer, just say that you don't know, don't try to " - "make up an answer. Give answer in " - "{lang}.\n\n" - "{context}\n" - "Question: {question}\n" - "Helpful Answer:" -) - -DEFAULT_QA_TABLE_PROMPT = ( - "Use the given context: texts, tables, and figures below to answer the question, " - "then provide answer with clear explanation." - "If you don't know the answer, just say that you don't know, " - "don't try to make up an answer. Give answer in {lang}.\n\n" - "Context:\n" - "{context}\n" - "Question: {question}\n" - "Helpful Answer:" -) # noqa - -DEFAULT_QA_CHATBOT_PROMPT = ( - "Pick the most suitable chatbot scenarios to answer the question at the end, " - "output the provided answer text. If you don't know the answer, " - "just say that you don't know. Keep the answer as concise as possible. " - "Give answer in {lang}.\n\n" - "Context:\n" - "{context}\n" - "Question: {question}\n" - "Answer:" -) # noqa - -DEFAULT_QA_FIGURE_PROMPT = ( - "Use the given context: texts, tables, and figures below to answer the question. " - "If you don't know the answer, just say that you don't know. " - "Give answer in {lang}.\n\n" - "Context: \n" - "{context}\n" - "Question: {question}\n" - "Answer: " -) # noqa - -CONTEXT_RELEVANT_WARNING_SCORE = 0.7 - - -class AnswerWithContextPipeline(BaseComponent): - """Answer the question based on the evidence - - Args: - llm: the language model to generate the answer - citation_pipeline: generates citation from the evidence - qa_template: the prompt template for LLM to generate answer (refer to - evidence_mode) - qa_table_template: the prompt template for LLM to generate answer for table - (refer to evidence_mode) - qa_chatbot_template: the prompt template for LLM to generate answer for - pre-made scenarios (refer to evidence_mode) - lang: the language of the answer. Currently support English and Japanese - """ - - llm: ChatLLM = Node(default_callback=lambda _: llms.get_default()) - vlm_endpoint: str = getattr(flowsettings, "KH_VLM_ENDPOINT", "") - use_multimodal: bool = getattr(flowsettings, "KH_REASONINGS_USE_MULTIMODAL", True) - citation_pipeline: CitationPipeline = Node( - default_callback=lambda _: CitationPipeline(llm=llms.get_default()) - ) - create_mindmap_pipeline: CreateMindmapPipeline = Node( - default_callback=lambda _: CreateMindmapPipeline(llm=llms.get_default()) - ) - - qa_template: str = DEFAULT_QA_TEXT_PROMPT - qa_table_template: str = DEFAULT_QA_TABLE_PROMPT - qa_chatbot_template: str = DEFAULT_QA_CHATBOT_PROMPT - qa_figure_template: str = DEFAULT_QA_FIGURE_PROMPT - - enable_citation: bool = False - enable_mindmap: bool = False - enable_citation_viz: bool = False - - system_prompt: str = "" - lang: str = "English" # support English and Japanese - n_last_interactions: int = 5 - - def get_prompt(self, question, evidence, evidence_mode: int): - """Prepare the prompt and other information for LLM""" - if evidence_mode == EVIDENCE_MODE_TEXT: - prompt_template = PromptTemplate(self.qa_template) - elif evidence_mode == EVIDENCE_MODE_TABLE: - prompt_template = PromptTemplate(self.qa_table_template) - elif evidence_mode == EVIDENCE_MODE_FIGURE: - if self.use_multimodal: - prompt_template = PromptTemplate(self.qa_figure_template) - else: - prompt_template = PromptTemplate(self.qa_template) - else: - prompt_template = PromptTemplate(self.qa_chatbot_template) - - prompt = prompt_template.populate( - context=evidence, - question=question, - lang=self.lang, - ) - - return prompt, evidence - - def run( - self, question: str, evidence: str, evidence_mode: int = 0, **kwargs - ) -> Document: - return self.invoke(question, evidence, evidence_mode, **kwargs) - - def invoke( - self, - question: str, - evidence: str, - evidence_mode: int = 0, - images: list[str] = [], - **kwargs, - ) -> Document: - raise NotImplementedError - - async def ainvoke( # type: ignore - self, - question: str, - evidence: str, - evidence_mode: int = 0, - images: list[str] = [], - **kwargs, - ) -> Document: - """Answer the question based on the evidence - - In addition to the question and the evidence, this method also take into - account evidence_mode. The evidence_mode tells which kind of evidence is. - The kind of evidence affects: - 1. How the evidence is represented. - 2. The prompt to generate the answer. - - By default, the evidence_mode is 0, which means the evidence is plain text with - no particular semantic representation. The evidence_mode can be: - 1. "table": There will be HTML markup telling that there is a table - within the evidence. - 2. "chatbot": There will be HTML markup telling that there is a chatbot. - This chatbot is a scenario, extracted from an Excel file, where each - row corresponds to an interaction. - - Args: - question: the original question posed by user - evidence: the text that contain relevant information to answer the question - (determined by retrieval pipeline) - evidence_mode: the mode of evidence, 0 for text, 1 for table, 2 for chatbot - """ - raise NotImplementedError - - def stream( # type: ignore - self, - question: str, - evidence: str, - evidence_mode: int = 0, - images: list[str] = [], - **kwargs, - ) -> Generator[Document, None, Document]: - history = kwargs.get("history", []) - print(f"Got {len(images)} images") - # check if evidence exists, use QA prompt - if evidence: - prompt, evidence = self.get_prompt(question, evidence, evidence_mode) - else: - prompt = question - - # retrieve the citation - citation = None - mindmap = None - - def citation_call(): - nonlocal citation - citation = self.citation_pipeline(context=evidence, question=question) - - def mindmap_call(): - nonlocal mindmap - mindmap = self.create_mindmap_pipeline(context=evidence, question=question) - - citation_thread = None - mindmap_thread = None - - # execute function call in thread - if evidence: - if self.enable_citation: - citation_thread = threading.Thread(target=citation_call) - citation_thread.start() - - if self.enable_mindmap: - mindmap_thread = threading.Thread(target=mindmap_call) - mindmap_thread.start() - - output = "" - logprobs = [] - - messages = [] - if self.system_prompt: - messages.append(SystemMessage(content=self.system_prompt)) - for human, ai in history[-self.n_last_interactions :]: - messages.append(HumanMessage(content=human)) - messages.append(AIMessage(content=ai)) - - if self.use_multimodal and evidence_mode == EVIDENCE_MODE_FIGURE: - # create image message: - messages.append( - HumanMessage( - content=[ - {"type": "text", "text": prompt}, - ] - + [ - { - "type": "image_url", - "image_url": {"url": image}, - } - for image in images[:MAX_IMAGES] - ], - ) - ) - else: - # append main prompt - messages.append(HumanMessage(content=prompt)) - - try: - # try streaming first - print("Trying LLM streaming") - for out_msg in self.llm.stream(messages): - output += out_msg.text - logprobs += out_msg.logprobs - yield Document(channel="chat", content=out_msg.text) - except NotImplementedError: - print("Streaming is not supported, falling back to normal processing") - output = self.llm(messages).text - yield Document(channel="chat", content=output) - - if logprobs: - qa_score = np.exp(np.average(logprobs)) - else: - qa_score = None - - if citation_thread: - citation_thread.join(timeout=CITATION_TIMEOUT) - if mindmap_thread: - mindmap_thread.join(timeout=CITATION_TIMEOUT) - - answer = Document( - text=output, - metadata={ - "citation_viz": self.enable_citation_viz, - "mindmap": mindmap, - "citation": citation, - "qa_score": qa_score, - }, - ) - - return answer - class AddQueryContextPipeline(BaseComponent): @@ -481,7 +94,7 @@ class FullQAPipeline(BaseReasoning): retrievers: list[BaseComponent] evidence_pipeline: PrepareEvidencePipeline = PrepareEvidencePipeline.withx() - answering_pipeline: AnswerWithContextPipeline = AnswerWithContextPipeline.withx() + answering_pipeline: AnswerWithContextPipeline rewrite_pipeline: RewriteQuestionPipeline | None = None create_citation_viz_pipeline: CreateCitationVizPipeline = Node( default_callback=lambda _: CreateCitationVizPipeline( @@ -548,104 +161,35 @@ class FullQAPipeline(BaseReasoning): return docs, info - def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document]]: - """Prepare the citations to show on the UI""" - with_citation, without_citation = [], [] - spans = defaultdict(list) - has_llm_score = any("llm_trulens_score" in doc.metadata for doc in docs) - - if answer.metadata["citation"]: - evidences = answer.metadata["citation"].evidences - for quote in evidences: - matched_excerpts = [] - for doc in docs: - matches = find_text(quote, doc.text) - - for start, end in matches: - if "|" not in doc.text[start:end]: - spans[doc.doc_id].append( - { - "start": start, - "end": end, - } - ) - matched_excerpts.append(doc.text[start:end]) - - # print("Matched citation:", quote, matched_excerpts), - - id2docs = {doc.doc_id: doc for doc in docs} - not_detected = set(id2docs.keys()) - set(spans.keys()) - - # render highlight spans - for _id, ss in spans.items(): - if not ss: - not_detected.add(_id) - continue - cur_doc = id2docs[_id] - highlight_text = "" - - ss = sorted(ss, key=lambda x: x["start"]) - text = cur_doc.text[: ss[0]["start"]] - for idx, span in enumerate(ss): - to_highlight = cur_doc.text[span["start"] : span["end"]] - if len(to_highlight) > len(highlight_text): - highlight_text = to_highlight - text += Render.highlight(to_highlight) - if idx < len(ss) - 1: - text += cur_doc.text[span["end"] : ss[idx + 1]["start"]] - text += cur_doc.text[ss[-1]["end"] :] - # add to display list - with_citation.append( - Document( - channel="info", - content=Render.collapsible_with_header_score( - cur_doc, - override_text=text, - highlight_text=highlight_text, - open_collapsible=True, - ), - ) - ) - - print("Got {} cited docs".format(len(with_citation))) - - sorted_not_detected_items_with_scores = [ - (id_, id2docs[id_].metadata.get("llm_trulens_score", 0.0)) - for id_ in not_detected - ] - sorted_not_detected_items_with_scores.sort(key=lambda x: x[1], reverse=True) - - for id_, _ in sorted_not_detected_items_with_scores: - doc = id2docs[id_] - doc_score = doc.metadata.get("llm_trulens_score", 0.0) - is_open = not has_llm_score or ( - doc_score > CONTEXT_RELEVANT_WARNING_SCORE and len(with_citation) == 0 - ) - without_citation.append( - Document( - channel="info", - content=Render.collapsible_with_header_score( - doc, open_collapsible=is_open - ), - ) - ) - return with_citation, without_citation - def prepare_mindmap(self, answer) -> Document | None: mindmap = answer.metadata["mindmap"] if mindmap: mindmap_text = mindmap.text uml_renderer = PlantUML() - mindmap_svg = uml_renderer.process(mindmap_text) + + try: + mindmap_svg = uml_renderer.process(mindmap_text) + except Exception as e: + print("Failed to process mindmap:", e) + mindmap_svg = "" + + # post-process the mindmap SVG + mindmap_svg = ( + mindmap_svg.replace("sans-serif", "Quicksand, sans-serif") + .replace("#181818", "#cecece") + .replace("background:#FFFFF", "background:none") + .replace("stroke-width:1", "stroke-width:2") + ) mindmap_content = Document( channel="info", content=Render.collapsible( header=""" Mindmap - - [Expand] - """, + + [Expand] + + [Export]""", content=mindmap_svg, open=True, ), @@ -674,7 +218,9 @@ class FullQAPipeline(BaseReasoning): def show_citations_and_addons(self, answer, docs, question): # show the evidence - with_citation, without_citation = self.prepare_citations(answer, docs) + with_citation, without_citation = self.answering_pipeline.prepare_citations( + answer, docs + ) mindmap_output = self.prepare_mindmap(answer) citation_plot_output = self.prepare_citation_viz(answer, question, docs) @@ -773,6 +319,13 @@ class FullQAPipeline(BaseReasoning): return answer + @classmethod + def prepare_pipeline_instance(cls, settings, retrievers): + return cls( + retrievers=retrievers, + rewrite_pipeline=RewriteQuestionPipeline(), + ) + @classmethod def get_pipeline(cls, settings, states, retrievers): """Get the reasoning pipeline @@ -783,10 +336,7 @@ class FullQAPipeline(BaseReasoning): """ max_context_length_setting = settings.get("reasoning.max_context_length", 32000) - pipeline = cls( - retrievers=retrievers, - rewrite_pipeline=RewriteQuestionPipeline(), - ) + pipeline = cls.prepare_pipeline_instance(settings, retrievers) prefix = f"reasoning.options.{cls.get_info()['id']}" llm_name = settings.get(f"{prefix}.llm", None) @@ -797,13 +347,22 @@ class FullQAPipeline(BaseReasoning): evidence_pipeline.max_context_length = max_context_length_setting # answering pipeline configuration - answer_pipeline = pipeline.answering_pipeline + use_inline_citation = settings[f"{prefix}.highlight_citation"] == "inline" + + if use_inline_citation: + answer_pipeline = pipeline.answering_pipeline = AnswerWithInlineCitation() + else: + answer_pipeline = pipeline.answering_pipeline = AnswerWithContextPipeline() + answer_pipeline.llm = llm answer_pipeline.citation_pipeline.llm = llm answer_pipeline.n_last_interactions = settings[f"{prefix}.n_last_interactions"] - answer_pipeline.enable_citation = settings[f"{prefix}.highlight_citation"] + answer_pipeline.enable_citation = ( + settings[f"{prefix}.highlight_citation"] != "off" + ) answer_pipeline.enable_mindmap = settings[f"{prefix}.create_mindmap"] answer_pipeline.enable_citation_viz = settings[f"{prefix}.create_citation_viz"] + answer_pipeline.use_multimodal = settings[f"{prefix}.use_multimodal"] answer_pipeline.system_prompt = settings[f"{prefix}.system_prompt"] answer_pipeline.qa_template = settings[f"{prefix}.qa_prompt"] answer_pipeline.lang = SUPPORTED_LANGUAGE_MAP.get( @@ -848,9 +407,10 @@ class FullQAPipeline(BaseReasoning): ), }, "highlight_citation": { - "name": "Highlight Citation", - "value": True, - "component": "checkbox", + "name": "Citation style", + "value": "highlight", + "component": "radio", + "choices": ["highlight", "inline", "off"], }, "create_mindmap": { "name": "Create Mindmap", @@ -862,6 +422,11 @@ class FullQAPipeline(BaseReasoning): "value": False, "component": "checkbox", }, + "use_multimodal": { + "name": "Use Multimodal Input", + "value": False, + "component": "checkbox", + }, "system_prompt": { "name": "System Prompt", "value": "This is a question answering system", @@ -979,7 +544,9 @@ class FullDecomposeQAPipeline(FullQAPipeline): ) # show the evidence - with_citation, without_citation = self.prepare_citations(answer, docs) + with_citation, without_citation = self.answering_pipeline.prepare_citations( + answer, docs + ) if not with_citation and not without_citation: yield Document(channel="info", content="
No evidence found.
") else: @@ -999,13 +566,7 @@ class FullDecomposeQAPipeline(FullQAPipeline): return user_settings @classmethod - def get_pipeline(cls, settings, states, retrievers): - """Get the reasoning pipeline - - Args: - settings: the settings for the pipeline - retrievers: the retrievers to use - """ + def prepare_pipeline_instance(cls, settings, retrievers): prefix = f"reasoning.options.{cls.get_info()['id']}" pipeline = cls( retrievers=retrievers, @@ -1013,31 +574,6 @@ class FullDecomposeQAPipeline(FullQAPipeline): prompt_template=settings.get(f"{prefix}.decompose_prompt") ), ) - - llm_name = settings.get(f"{prefix}.llm", None) - llm = llms.get(llm_name, llms.get_default()) - - # answering pipeline configuration - answer_pipeline = pipeline.answering_pipeline - answer_pipeline.llm = llm - answer_pipeline.citation_pipeline.llm = llm - answer_pipeline.n_last_interactions = settings[f"{prefix}.n_last_interactions"] - answer_pipeline.enable_citation = settings[f"{prefix}.highlight_citation"] - answer_pipeline.system_prompt = settings[f"{prefix}.system_prompt"] - answer_pipeline.qa_template = settings[f"{prefix}.qa_prompt"] - answer_pipeline.lang = SUPPORTED_LANGUAGE_MAP.get( - settings["reasoning.lang"], "English" - ) - - pipeline.add_query_context.llm = llm - pipeline.add_query_context.n_last_interactions = settings[ - f"{prefix}.n_last_interactions" - ] - - pipeline.trigger_context = settings[f"{prefix}.trigger_context"] - pipeline.use_rewrite = states.get("app", {}).get("regen", False) - if pipeline.rewrite_pipeline: - pipeline.rewrite_pipeline.llm = llm return pipeline @classmethod diff --git a/libs/ktem/ktem/utils/render.py b/libs/ktem/ktem/utils/render.py index 3e6c434..3614ad2 100644 --- a/libs/ktem/ktem/utils/render.py +++ b/libs/ktem/ktem/utils/render.py @@ -40,7 +40,10 @@ class Render: def collapsible(header, content, open: bool = False) -> str: """Render an HTML friendly collapsible section""" o = " open" if open else "" - return f"{header}{content}
" + return ( + f"
" + f"{header}{content}

" + ) @staticmethod def table(text: str) -> str: @@ -103,9 +106,10 @@ class Render: """ # noqa @staticmethod - def highlight(text: str) -> str: + def highlight(text: str, elem_id: str | None = None) -> str: """Highlight text""" - return f"{text}" + id_text = f" id='mark-{elem_id}'" if elem_id else "" + return f"{text}" @staticmethod def image(url: str, text: str = "") -> str: