From e6fa1af4042ecf239982d8dd0f9fa382aed0cd76 Mon Sep 17 00:00:00 2001 From: "Tuan Anh Nguyen Dang (Tadashi_Cin)" Date: Thu, 17 Oct 2024 14:35:28 +0700 Subject: [PATCH] feat: add mindmap visualization (#405) bump:minor --- libs/ktem/ktem/app.py | 5 + libs/ktem/ktem/assets/js/svg-pan-zoom.min.js | 3 + libs/ktem/ktem/pages/chat/__init__.py | 23 ++++ .../reasoning/prompt_optimization/__init__.py | 2 + .../reasoning/prompt_optimization/mindmap.py | 52 ++++++++ libs/ktem/ktem/reasoning/simple.py | 74 ++++++++++-- libs/ktem/ktem/utils/plantuml.py | 113 ++++++++++++++++++ 7 files changed, 263 insertions(+), 9 deletions(-) create mode 100644 libs/ktem/ktem/assets/js/svg-pan-zoom.min.js create mode 100644 libs/ktem/ktem/reasoning/prompt_optimization/mindmap.py create mode 100644 libs/ktem/ktem/utils/plantuml.py diff --git a/libs/ktem/ktem/app.py b/libs/ktem/ktem/app.py index 4c4c5d6..b56612b 100644 --- a/libs/ktem/ktem/app.py +++ b/libs/ktem/ktem/app.py @@ -55,6 +55,8 @@ class BaseApp: "PDFJS_PREBUILT_DIR", pdf_js_dist_dir, ) + with (dir_assets / "js" / "svg-pan-zoom.min.js").open() as fi: + self._svg_js = fi.read() self._favicon = str(dir_assets / "img" / "favicon.svg") @@ -172,6 +174,9 @@ class BaseApp: "" + "" ) with gr.Blocks( diff --git a/libs/ktem/ktem/assets/js/svg-pan-zoom.min.js b/libs/ktem/ktem/assets/js/svg-pan-zoom.min.js new file mode 100644 index 0000000..0e3558a --- /dev/null +++ b/libs/ktem/ktem/assets/js/svg-pan-zoom.min.js @@ -0,0 +1,3 @@ +// svg-pan-zoom v3.6.2 +// https://github.com/bumbu/svg-pan-zoom +!function s(r,a,l){function u(e,t){if(!a[e]){if(!r[e]){var o="function"==typeof require&&require;if(!t&&o)return o(e,!0);if(h)return h(e,!0);var n=new Error("Cannot find module '"+e+"'");throw n.code="MODULE_NOT_FOUND",n}var i=a[e]={exports:{}};r[e][0].call(i.exports,function(t){return u(r[e][1][t]||t)},i,i.exports,s,r,a,l)}return a[e].exports}for(var h="function"==typeof require&&require,t=0;tthis.options.maxZoom*n.zoom&&(t=this.options.maxZoom*n.zoom/this.getZoom());var i=this.viewport.getCTM(),s=e.matrixTransform(i.inverse()),r=this.svg.createSVGMatrix().translate(s.x,s.y).scale(t).translate(-s.x,-s.y),a=i.multiply(r);a.a!==i.a&&this.viewport.setCTM(a)},i.prototype.zoom=function(t,e){this.zoomAtPoint(t,a.getSvgCenterPoint(this.svg,this.width,this.height),e)},i.prototype.publicZoom=function(t,e){e&&(t=this.computeFromRelativeZoom(t)),this.zoom(t,e)},i.prototype.publicZoomAtPoint=function(t,e,o){if(o&&(t=this.computeFromRelativeZoom(t)),"SVGPoint"!==r.getType(e)){if(!("x"in e&&"y"in e))throw new Error("Given point is invalid");e=a.createSVGPoint(this.svg,e.x,e.y)}this.zoomAtPoint(t,e,o)},i.prototype.getZoom=function(){return this.viewport.getZoom()},i.prototype.getRelativeZoom=function(){return this.viewport.getRelativeZoom()},i.prototype.computeFromRelativeZoom=function(t){return t*this.viewport.getOriginalState().zoom},i.prototype.resetZoom=function(){var t=this.viewport.getOriginalState();this.zoom(t.zoom,!0)},i.prototype.resetPan=function(){this.pan(this.viewport.getOriginalState())},i.prototype.reset=function(){this.resetZoom(),this.resetPan()},i.prototype.handleDblClick=function(t){var e;if((this.options.preventMouseEventsDefault&&(t.preventDefault?t.preventDefault():t.returnValue=!1),this.options.controlIconsEnabled)&&-1<(t.target.getAttribute("class")||"").indexOf("svg-pan-zoom-control"))return!1;e=t.shiftKey?1/(2*(1+this.options.zoomScaleSensitivity)):2*(1+this.options.zoomScaleSensitivity);var o=a.getEventPoint(t,this.svg).matrixTransform(this.svg.getScreenCTM().inverse());this.zoomAtPoint(e,o)},i.prototype.handleMouseDown=function(t,e){this.options.preventMouseEventsDefault&&(t.preventDefault?t.preventDefault():t.returnValue=!1),r.mouseAndTouchNormalize(t,this.svg),this.options.dblClickZoomEnabled&&r.isDblClick(t,e)?this.handleDblClick(t):(this.state="pan",this.firstEventCTM=this.viewport.getCTM(),this.stateOrigin=a.getEventPoint(t,this.svg).matrixTransform(this.firstEventCTM.inverse()))},i.prototype.handleMouseMove=function(t){if(this.options.preventMouseEventsDefault&&(t.preventDefault?t.preventDefault():t.returnValue=!1),"pan"===this.state&&this.options.panEnabled){var e=a.getEventPoint(t,this.svg).matrixTransform(this.firstEventCTM.inverse()),o=this.firstEventCTM.translate(e.x-this.stateOrigin.x,e.y-this.stateOrigin.y);this.viewport.setCTM(o)}},i.prototype.handleMouseUp=function(t){this.options.preventMouseEventsDefault&&(t.preventDefault?t.preventDefault():t.returnValue=!1),"pan"===this.state&&(this.state="none")},i.prototype.fit=function(){var t=this.viewport.getViewBox(),e=Math.min(this.width/t.width,this.height/t.height);this.zoom(e,!0)},i.prototype.contain=function(){var t=this.viewport.getViewBox(),e=Math.max(this.width/t.width,this.height/t.height);this.zoom(e,!0)},i.prototype.center=function(){var t=this.viewport.getViewBox(),e=.5*(this.width-(t.width+2*t.x)*this.getZoom()),o=.5*(this.height-(t.height+2*t.y)*this.getZoom());this.getPublicInstance().pan({x:e,y:o})},i.prototype.updateBBox=function(){this.viewport.simpleViewBoxCache()},i.prototype.pan=function(t){var e=this.viewport.getCTM();e.e=t.x,e.f=t.y,this.viewport.setCTM(e)},i.prototype.panBy=function(t){var e=this.viewport.getCTM();e.e+=t.x,e.f+=t.y,this.viewport.setCTM(e)},i.prototype.getPan=function(){var t=this.viewport.getState();return{x:t.x,y:t.y}},i.prototype.resize=function(){var t=a.getBoundingClientRectNormalized(this.svg);this.width=t.width,this.height=t.height;var e=this.viewport;e.options.width=this.width,e.options.height=this.height,e.processCTM(),this.options.controlIconsEnabled&&(this.getPublicInstance().disableControlIcons(),this.getPublicInstance().enableControlIcons())},i.prototype.destroy=function(){var e=this;for(var t in this.beforeZoom=null,this.onZoom=null,this.beforePan=null,this.onPan=null,(this.onUpdatedCTM=null)!=this.options.customEventsHandler&&this.options.customEventsHandler.destroy({svgElement:this.svg,eventsListenerElement:this.options.eventsListenerElement,instance:this.getPublicInstance()}),this.eventListeners)(this.options.eventsListenerElement||this.svg).removeEventListener(t,this.eventListeners[t],!this.options.preventMouseEventsDefault&&h);this.disableMouseWheelZoom(),this.getPublicInstance().disableControlIcons(),this.reset(),c=c.filter(function(t){return t.svg!==e.svg}),delete this.options,delete this.viewport,delete this.publicInstance,delete this.pi,this.getPublicInstance=function(){return null}},i.prototype.getPublicInstance=function(){var o=this;return this.publicInstance||(this.publicInstance=this.pi={enablePan:function(){return o.options.panEnabled=!0,o.pi},disablePan:function(){return o.options.panEnabled=!1,o.pi},isPanEnabled:function(){return!!o.options.panEnabled},pan:function(t){return o.pan(t),o.pi},panBy:function(t){return o.panBy(t),o.pi},getPan:function(){return o.getPan()},setBeforePan:function(t){return o.options.beforePan=null===t?null:r.proxy(t,o.publicInstance),o.pi},setOnPan:function(t){return o.options.onPan=null===t?null:r.proxy(t,o.publicInstance),o.pi},enableZoom:function(){return o.options.zoomEnabled=!0,o.pi},disableZoom:function(){return o.options.zoomEnabled=!1,o.pi},isZoomEnabled:function(){return!!o.options.zoomEnabled},enableControlIcons:function(){return o.options.controlIconsEnabled||(o.options.controlIconsEnabled=!0,s.enable(o)),o.pi},disableControlIcons:function(){return o.options.controlIconsEnabled&&(o.options.controlIconsEnabled=!1,s.disable(o)),o.pi},isControlIconsEnabled:function(){return!!o.options.controlIconsEnabled},enableDblClickZoom:function(){return o.options.dblClickZoomEnabled=!0,o.pi},disableDblClickZoom:function(){return o.options.dblClickZoomEnabled=!1,o.pi},isDblClickZoomEnabled:function(){return!!o.options.dblClickZoomEnabled},enableMouseWheelZoom:function(){return o.enableMouseWheelZoom(),o.pi},disableMouseWheelZoom:function(){return o.disableMouseWheelZoom(),o.pi},isMouseWheelZoomEnabled:function(){return!!o.options.mouseWheelZoomEnabled},setZoomScaleSensitivity:function(t){return o.options.zoomScaleSensitivity=t,o.pi},setMinZoom:function(t){return o.options.minZoom=t,o.pi},setMaxZoom:function(t){return o.options.maxZoom=t,o.pi},setBeforeZoom:function(t){return o.options.beforeZoom=null===t?null:r.proxy(t,o.publicInstance),o.pi},setOnZoom:function(t){return o.options.onZoom=null===t?null:r.proxy(t,o.publicInstance),o.pi},zoom:function(t){return o.publicZoom(t,!0),o.pi},zoomBy:function(t){return o.publicZoom(t,!1),o.pi},zoomAtPoint:function(t,e){return o.publicZoomAtPoint(t,e,!0),o.pi},zoomAtPointBy:function(t,e){return o.publicZoomAtPoint(t,e,!1),o.pi},zoomIn:function(){return this.zoomBy(1+o.options.zoomScaleSensitivity),o.pi},zoomOut:function(){return this.zoomBy(1/(1+o.options.zoomScaleSensitivity)),o.pi},getZoom:function(){return o.getRelativeZoom()},setOnUpdatedCTM:function(t){return o.options.onUpdatedCTM=null===t?null:r.proxy(t,o.publicInstance),o.pi},resetZoom:function(){return o.resetZoom(),o.pi},resetPan:function(){return o.resetPan(),o.pi},reset:function(){return o.reset(),o.pi},fit:function(){return o.fit(),o.pi},contain:function(){return o.contain(),o.pi},center:function(){return o.center(),o.pi},updateBBox:function(){return o.updateBBox(),o.pi},resize:function(){return o.resize(),o.pi},getSizes:function(){return{width:o.width,height:o.height,realZoom:o.getZoom(),viewBox:o.viewport.getViewBox()}},destroy:function(){return o.destroy(),o.pi}}),this.publicInstance};var c=[];e.exports=function(t,e){var o=r.getSvg(t);if(null===o)return null;for(var n=c.length-1;0<=n;n--)if(c[n].svg===o)return c[n].instance.getPublicInstance();return c.push({svg:o,instance:new i(o,e)}),c[c.length-1].instance.getPublicInstance()}},{"./control-icons":1,"./shadow-viewport":2,"./svg-utilities":5,"./uniwheel":6,"./utilities":7}],5:[function(t,e,o){var l=t("./utilities"),s="unknown";document.documentMode&&(s="ie"),e.exports={svgNS:"http://www.w3.org/2000/svg",xmlNS:"http://www.w3.org/XML/1998/namespace",xmlnsNS:"http://www.w3.org/2000/xmlns/",xlinkNS:"http://www.w3.org/1999/xlink",evNS:"http://www.w3.org/2001/xml-events",getBoundingClientRectNormalized:function(t){if(t.clientWidth&&t.clientHeight)return{width:t.clientWidth,height:t.clientHeight};if(t.getBoundingClientRect())return t.getBoundingClientRect();throw new Error("Cannot get BoundingClientRect for SVG.")},getOrCreateViewport:function(t,e){var o=null;if(!(o=l.isElement(e)?e:t.querySelector(e))){var n=Array.prototype.slice.call(t.childNodes||t.children).filter(function(t){return"defs"!==t.nodeName&&"#text"!==t.nodeName});1===n.length&&"g"===n[0].nodeName&&null===n[0].getAttribute("transform")&&(o=n[0])}if(!o){var i="viewport-"+(new Date).toISOString().replace(/\D/g,"");(o=document.createElementNS(this.svgNS,"g")).setAttribute("id",i);var s=t.childNodes||t.children;if(s&&0 Document: # type: ignore + prompt_template = PromptTemplate(self.prompt_template) + prompt = prompt_template.populate( + question=question, + context=context, + ) + + messages = [ + SystemMessage(content=self.SYSTEM_PROMPT), + HumanMessage(content=prompt), + ] + + return self.llm(messages) diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py index e6d84ca..9702efd 100644 --- a/libs/ktem/ktem/reasoning/simple.py +++ b/libs/ktem/ktem/reasoning/simple.py @@ -10,9 +10,11 @@ import numpy as np import tiktoken from ktem.llms.manager import llms from ktem.reasoning.prompt_optimization import ( + CreateMindmapPipeline, DecomposeQuestionPipeline, RewriteQuestionPipeline, ) +from ktem.utils.plantuml import PlantUML from ktem.utils.render import Render from theflow.settings import settings as flowsettings @@ -227,6 +229,9 @@ class AnswerWithContextPipeline(BaseComponent): citation_pipeline: CitationPipeline = Node( default_callback=lambda _: CitationPipeline(llm=llms.get_default()) ) + create_mindmap_pipeline: CreateMindmapPipeline = Node( + default_callback=lambda _: CreateMindmapPipeline(llm=llms.get_default()) + ) qa_template: str = DEFAULT_QA_TEXT_PROMPT qa_table_template: str = DEFAULT_QA_TABLE_PROMPT @@ -234,6 +239,8 @@ class AnswerWithContextPipeline(BaseComponent): qa_figure_template: str = DEFAULT_QA_FIGURE_PROMPT enable_citation: bool = False + enable_mindmap: bool = False + system_prompt: str = "" lang: str = "English" # support English and Japanese n_last_interactions: int = 5 @@ -325,17 +332,28 @@ class AnswerWithContextPipeline(BaseComponent): # retrieve the citation citation = None + mindmap = None def citation_call(): nonlocal citation citation = self.citation_pipeline(context=evidence, question=question) - if evidence and self.enable_citation: - # execute function call in thread - citation_thread = threading.Thread(target=citation_call) - citation_thread.start() - else: - citation_thread = None + def mindmap_call(): + nonlocal mindmap + mindmap = self.create_mindmap_pipeline(context=evidence, question=question) + + citation_thread = None + mindmap_thread = None + + # execute function call in thread + if evidence: + if self.enable_citation: + citation_thread = threading.Thread(target=citation_call) + citation_thread.start() + + if self.enable_mindmap: + mindmap_thread = threading.Thread(target=mindmap_call) + mindmap_thread.start() output = "" logprobs = [] @@ -386,10 +404,12 @@ class AnswerWithContextPipeline(BaseComponent): if citation_thread: citation_thread.join(timeout=CITATION_TIMEOUT) + if mindmap_thread: + mindmap_thread.join(timeout=CITATION_TIMEOUT) answer = Document( text=output, - metadata={"citation": citation, "qa_score": qa_score}, + metadata={"mindmap": mindmap, "citation": citation, "qa_score": qa_score}, ) return answer @@ -597,9 +617,35 @@ class FullQAPipeline(BaseReasoning): ) return with_citation, without_citation - def show_citations(self, answer, docs): + def prepare_mindmap(self, answer) -> Document | None: + mindmap = answer.metadata["mindmap"] + if mindmap: + mindmap_text = mindmap.text + uml_renderer = PlantUML() + mindmap_svg = uml_renderer.process(mindmap_text) + + mindmap_content = Document( + channel="info", + content=Render.collapsible( + header=""" + Mindmap + + [Expand] + """, + content=mindmap_svg, + open=True, + ), + ) + else: + mindmap_content = None + + return mindmap_content + + def show_citations_and_addons(self, answer, docs): # show the evidence with_citation, without_citation = self.prepare_citations(answer, docs) + mindmap_output = self.prepare_mindmap(answer) + if not with_citation and not without_citation: yield Document(channel="info", content="
No evidence found.
") else: @@ -611,6 +657,10 @@ class FullQAPipeline(BaseReasoning): # clear previous info yield Document(channel="info", content=None) + # yield mindmap output + if mindmap_output: + yield mindmap_output + # yield warning message if has_llm_score and max_llm_rerank_score < CONTEXT_RELEVANT_WARNING_SCORE: yield Document( @@ -683,7 +733,7 @@ class FullQAPipeline(BaseReasoning): if scoring_thread: scoring_thread.join() - yield from self.show_citations(answer, docs) + yield from self.show_citations_and_addons(answer, docs) return answer @@ -716,6 +766,7 @@ class FullQAPipeline(BaseReasoning): answer_pipeline.citation_pipeline.llm = llm answer_pipeline.n_last_interactions = settings[f"{prefix}.n_last_interactions"] answer_pipeline.enable_citation = settings[f"{prefix}.highlight_citation"] + answer_pipeline.enable_mindmap = settings[f"{prefix}.create_mindmap"] answer_pipeline.system_prompt = settings[f"{prefix}.system_prompt"] answer_pipeline.qa_template = settings[f"{prefix}.qa_prompt"] answer_pipeline.lang = SUPPORTED_LANGUAGE_MAP.get( @@ -764,6 +815,11 @@ class FullQAPipeline(BaseReasoning): "value": True, "component": "checkbox", }, + "create_mindmap": { + "name": "Create Mindmap", + "value": False, + "component": "checkbox", + }, "system_prompt": { "name": "System Prompt", "value": "This is a question answering system", diff --git a/libs/ktem/ktem/utils/plantuml.py b/libs/ktem/ktem/utils/plantuml.py new file mode 100644 index 0000000..be0cf47 --- /dev/null +++ b/libs/ktem/ktem/utils/plantuml.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python + +from __future__ import print_function + +import base64 +import string +from zlib import compress + +import httplib2 +import six # type: ignore + +if six.PY2: + from string import maketrans +else: + maketrans = bytes.maketrans + + +plantuml_alphabet = ( + string.digits + string.ascii_uppercase + string.ascii_lowercase + "-_" +) +base64_alphabet = string.ascii_uppercase + string.ascii_lowercase + string.digits + "+/" +b64_to_plantuml = maketrans( + base64_alphabet.encode("utf-8"), plantuml_alphabet.encode("utf-8") +) + + +class PlantUMLError(Exception): + """ + Error in processing. + """ + + +class PlantUMLConnectionError(PlantUMLError): + """ + Error connecting or talking to PlantUML Server. + """ + + +class PlantUMLHTTPError(PlantUMLConnectionError): + """ + Request to PlantUML server returned HTTP Error. + """ + + def __init__(self, response, content, *args, **kwdargs): + self.response = response + self.content = content + message = "%d: %s" % (self.response.status, self.response.reason) + if not getattr(self, "message", None): + self.message = message + super(PlantUMLHTTPError, self).__init__(message, *args, **kwdargs) + + +def deflate_and_encode(plantuml_text): + """zlib compress the plantuml text and encode it for the plantuml server.""" + zlibbed_str = compress(plantuml_text.encode("utf-8")) + compressed_string = zlibbed_str[2:-4] + return ( + base64.b64encode(compressed_string).translate(b64_to_plantuml).decode("utf-8") + ) + + +class PlantUML(object): + """Connection to a PlantUML server with optional authentication. + + All parameters are optional. + + :param str url: URL to the PlantUML server image CGI. defaults to + http://www.plantuml.com/plantuml/svg/ + :param dict request_opts: Extra options to be passed off to the + httplib2.Http().request() call. + """ + + def __init__(self, url="http://www.plantuml.com/plantuml/svg/", request_opts={}): + self.HttpLib2Error = httplib2.HttpLib2Error + self.http = httplib2.Http() + + self.url = url + self.request_opts = request_opts + + def get_url(self, plantuml_text): + """Return the server URL for the image. + You can use this URL in an IMG HTML tag. + + :param str plantuml_text: The plantuml markup to render + :returns: the plantuml server image URL + """ + return self.url + deflate_and_encode(plantuml_text) + + def process(self, plantuml_text): + """Processes the plantuml text into the raw PNG image data. + + :param str plantuml_text: The plantuml markup to render + :returns: the raw image data + """ + url = self.get_url(plantuml_text) + try: + response, content = self.http.request(url, **self.request_opts) + except self.HttpLib2Error as e: + raise PlantUMLConnectionError(e) + if response.status != 200: + raise PlantUMLHTTPError(response, content) + + svg_content = content.decode("utf-8") + svg_content = svg_content.replace("" + f"{svg_content}" + ) + + return svg_content