feat: sso login, demo mode & new mindmap support (#644) bump:minor

* fix: update .env.example

* feat: add SSO login

* fix: update flowsetting

* fix: add requirement

* fix: refine UI

* fix: update group id-based operation

* fix: improve citation logics

* fix: UI enhancement

* fix: user_id to string in models

* fix: improve chat suggestion UI and flow

* fix: improve group id handling

* fix: improve chat suggestion

* fix: secure download for single file

* fix: file limiting in docstore

* fix: improve chat suggestion logics & language conform

* feat: add markmap and select text to highlight function

* fix: update Dockerfile

* fix: user id auto generate

* fix: default user id

* feat: add demo mode

* fix: update flowsetting

* fix: revise default params for demo

* feat: sso_app alternative

* feat: sso login demo

* feat: demo specific customization

* feat: add login using API key

* fix: disable key-based login

* fix: optimize duplicate upload

* fix: gradio routing

* fix: disable arm build for demo

* fix: revise full-text search js logic

* feat: add rate limit

* fix: update Dockerfile with new launch script

* fix: update Dockerfile

* fix: update Dockerignore

* fix: update ratelimit logic

* fix: user_id in user management page

* fix: rename conv logic

* feat: update demo hint

* fix: minor fix

* fix: highlight on long PDF load

* feat: add HF paper list

* fix: update HF papers load logic

* feat: fly config

* fix: update fly config

* fix: update paper list pull api

* fix: minor update root routing

* fix: minor update root routing

* fix: simplify login flow & paper list UI

* feat: add paper recommendation

* fix: update Dockerfile

* fix: update Dockerfile

* fix: update default model

* feat: add long context Ollama through LCOllama

* feat: espose Gradio share to env

* fix: revert customized changes

* fix: list group at app load

* fix: relocate share conv button

* fix: update launch script

* fix: update Docker CI

* feat: add Ollama model selection at first setup

* docs: update README
This commit is contained in:
Tuan Anh Nguyen Dang (Tadashi_Cin)
2025-02-02 15:19:48 +07:00
committed by GitHub
parent 3006402d7e
commit 3bd3830b8d
52 changed files with 2488 additions and 937 deletions

View File

@@ -13,7 +13,7 @@ from ktem.settings import BaseSettingGroup, SettingGroup, SettingReasoningGroup
from theflow.settings import settings
from theflow.utils.modules import import_dotted_string
BASE_PATH = os.environ.get("GRADIO_ROOT_PATH", "")
BASE_PATH = os.environ.get("GR_FILE_ROOT_PATH", "")
class BaseApp:
@@ -57,7 +57,7 @@ class BaseApp:
self._pdf_view_js = self._pdf_view_js.replace(
"PDFJS_PREBUILT_DIR",
pdf_js_dist_dir,
).replace("GRADIO_ROOT_PATH", BASE_PATH)
).replace("GR_FILE_ROOT_PATH", BASE_PATH)
with (dir_assets / "js" / "svg-pan-zoom.min.js").open() as fi:
self._svg_js = fi.read()
@@ -79,7 +79,7 @@ class BaseApp:
self.default_settings.index.finalize()
self.settings_state = gr.State(self.default_settings.flatten())
self.user_id = gr.State(1 if not self.f_user_management else None)
self.user_id = gr.State("default" if not self.f_user_management else None)
def initialize_indices(self):
"""Create the index manager, start indices, and register to app settings"""
@@ -173,15 +173,25 @@ class BaseApp:
"""Called when the app is created"""
def make(self):
markmap_js = """
<script>
window.markmap = {
/** @type AutoLoaderOptions */
autoLoader: {
toolbar: true, // Enable toolbar
},
};
</script>
"""
external_js = (
"<script type='module' "
"src='https://cdn.skypack.dev/pdfjs-viewer-element'>"
"</script>"
"<script>"
f"{self._svg_js}"
"</script>"
"<script type='module' "
"src='https://cdnjs.cloudflare.com/ajax/libs/tributejs/5.1.3/tribute.min.js'>" # noqa
f"{markmap_js}"
"<script src='https://cdn.jsdelivr.net/npm/markmap-autoloader@0.16'></script>" # noqa
"<script src='https://cdn.jsdelivr.net/npm/minisearch@7.1.1/dist/umd/index.min.js'></script>" # noqa
"</script>"
"<link rel='stylesheet' href='https://cdnjs.cloudflare.com/ajax/libs/tributejs/5.1.3/tribute.css'/>" # noqa
)

View File

@@ -326,7 +326,12 @@ pdfjs-viewer-element {
/* Switch checkbox styles */
#is-public-checkbox {
/* #is-public-checkbox {
position: relative;
top: 4px;
} */
#suggest-chat-checkbox {
position: relative;
top: 4px;
}
@@ -411,3 +416,43 @@ details.evidence {
tbody:not(.row_odd) {
background: var(--table-even-background-fill);
}
#chat-suggestion {
max-height: 350px;
}
#chat-suggestion table {
overflow: hidden;
}
#chat-suggestion table thead {
display: none;
}
#paper-suggestion table {
overflow: hidden;
}
svg.markmap {
width: 100%;
height: 100%;
font-family: Quicksand, sans-serif;
font-size: 15px;
}
div.markmap {
height: 400px;
}
#google-login {
max-width: 450px;
}
#user-api-key-wrapper {
max-width: 450px;
}
#login-row {
display: grid;
place-items: center;
}

View File

@@ -11,10 +11,25 @@ function run() {
version_node.style = "position: fixed; top: 10px; right: 10px;";
main_parent.appendChild(version_node);
// add favicon
const favicon = document.createElement("link");
// set favicon attributes
favicon.rel = "icon";
favicon.type = "image/svg+xml";
favicon.href = "/favicon.ico";
document.head.appendChild(favicon);
// setup conversation dropdown placeholder
let conv_dropdown = document.querySelector("#conversation-dropdown input");
conv_dropdown.placeholder = "Browse conversation";
// move info-expand-button
let info_expand_button = document.getElementById("info-expand-button");
let chat_info_panel = document.getElementById("info-expand");
chat_info_panel.insertBefore(info_expand_button, chat_info_panel.childNodes[2]);
chat_info_panel.insertBefore(
info_expand_button,
chat_info_panel.childNodes[2]
);
// move toggle-side-bar button
let chat_expand_button = document.getElementById("chat-expand-button");
@@ -24,22 +39,24 @@ function run() {
// move setting close button
let setting_tab_nav_bar = document.querySelector("#settings-tab .tab-nav");
let setting_close_button = document.getElementById("save-setting-btn");
setting_tab_nav_bar.appendChild(setting_close_button);
if (setting_close_button) {
setting_tab_nav_bar.appendChild(setting_close_button);
}
let default_conv_column_min_width = "min(300px, 100%)";
conv_column.style.minWidth = default_conv_column_min_width
conv_column.style.minWidth = default_conv_column_min_width;
globalThis.toggleChatColumn = (() => {
globalThis.toggleChatColumn = () => {
/* get flex-grow value of chat_column */
let flex_grow = conv_column.style.flexGrow;
if (flex_grow == '0') {
conv_column.style.flexGrow = '1';
if (flex_grow == "0") {
conv_column.style.flexGrow = "1";
conv_column.style.minWidth = default_conv_column_min_width;
} else {
conv_column.style.flexGrow = '0';
conv_column.style.flexGrow = "0";
conv_column.style.minWidth = "0px";
}
});
};
chat_column.insertBefore(chat_expand_button, chat_column.firstChild);
@@ -47,22 +64,34 @@ function run() {
let mindmap_checkbox = document.getElementById("use-mindmap-checkbox");
let citation_dropdown = document.getElementById("citation-dropdown");
let chat_setting_panel = document.getElementById("chat-settings-expand");
chat_setting_panel.insertBefore(mindmap_checkbox, chat_setting_panel.childNodes[2]);
chat_setting_panel.insertBefore(
mindmap_checkbox,
chat_setting_panel.childNodes[2]
);
chat_setting_panel.insertBefore(citation_dropdown, mindmap_checkbox);
// move share conv checkbox
let report_div = document.querySelector(
"#report-accordion > div:nth-child(3) > div:nth-child(1)"
);
let share_conv_checkbox = document.getElementById("is-public-checkbox");
if (share_conv_checkbox) {
report_div.insertBefore(share_conv_checkbox, report_div.querySelector("button"));
}
// create slider toggle
const is_public_checkbox = document.getElementById("is-public-checkbox");
const is_public_checkbox = document.getElementById("suggest-chat-checkbox");
const label_element = is_public_checkbox.getElementsByTagName("label")[0];
const checkbox_span = is_public_checkbox.getElementsByTagName("span")[0];
new_div = document.createElement("div");
label_element.classList.add("switch");
is_public_checkbox.appendChild(checkbox_span);
label_element.appendChild(new_div)
label_element.appendChild(new_div);
// clpse
globalThis.clpseFn = (id) => {
var obj = document.getElementById('clpse-btn-' + id);
var obj = document.getElementById("clpse-btn-" + id);
obj.classList.toggle("clpse-active");
var content = obj.nextElementSibling;
if (content.style.display === "none") {
@@ -70,48 +99,188 @@ function run() {
} else {
content.style.display = "none";
}
}
};
// store info in local storage
globalThis.setStorage = (key, value) => {
localStorage.setItem(key, value)
}
localStorage.setItem(key, value);
};
globalThis.getStorage = (key, value) => {
item = localStorage.getItem(key);
return item ? item : value;
}
};
globalThis.removeFromStorage = (key) => {
localStorage.removeItem(key)
}
localStorage.removeItem(key);
};
// Function to scroll to given citation with ID
// Sleep function using Promise and setTimeout
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
return new Promise((resolve) => setTimeout(resolve, ms));
}
globalThis.scrollToCitation = async (event) => {
event.preventDefault(); // Prevent the default link behavior
var citationId = event.target.getAttribute('id');
event.preventDefault(); // Prevent the default link behavior
var citationId = event.target.getAttribute("id");
await sleep(100); // Sleep for 100 milliseconds
await sleep(100); // Sleep for 100 milliseconds
// check if modal is open
var modal = document.getElementById("pdf-modal");
var citation = document.querySelector('mark[id="' + citationId + '"]');
// check if modal is open
var modal = document.getElementById("pdf-modal");
var citation = document.querySelector('mark[id="' + citationId + '"]');
if (modal.style.display == "block") {
// trigger on click event of PDF Preview link
var detail_elem = citation;
// traverse up the DOM tree to find the parent element with tag detail
while (detail_elem.tagName.toLowerCase() != "details") {
detail_elem = detail_elem.parentElement;
}
detail_elem.getElementsByClassName("pdf-link").item(0).click();
} else {
if (citation) {
citation.scrollIntoView({ behavior: 'smooth' });
if (modal.style.display == "block") {
// trigger on click event of PDF Preview link
var detail_elem = citation;
// traverse up the DOM tree to find the parent element with tag detail
while (detail_elem.tagName.toLowerCase() != "details") {
detail_elem = detail_elem.parentElement;
}
detail_elem.getElementsByClassName("pdf-link").item(0).click();
} else {
if (citation) {
citation.scrollIntoView({ behavior: "smooth" });
}
}
};
globalThis.fullTextSearch = () => {
// Assign text selection event to last bot message
var bot_messages = document.querySelectorAll(
"div#main-chat-bot div.message-row.bot-row"
);
var last_bot_message = bot_messages[bot_messages.length - 1];
// check if the last bot message has class "text_selection"
if (last_bot_message.classList.contains("text_selection")) {
return;
}
// assign new class to last message
last_bot_message.classList.add("text_selection");
// Get sentences from evidence div
var evidences = document.querySelectorAll(
"#html-info-panel > div:last-child > div > details.evidence div.evidence-content"
);
console.log("Indexing evidences", evidences);
const segmenterEn = new Intl.Segmenter("en", { granularity: "sentence" });
// Split sentences and save to all_segments list
var all_segments = [];
for (var evidence of evidences) {
// check if <details> tag is open
if (!evidence.parentElement.open) {
continue;
}
var markmap_div = evidence.querySelector("div.markmap");
if (markmap_div) {
continue;
}
var evidence_content = evidence.textContent.replace(/[\r\n]+/g, " ");
sentence_it = segmenterEn.segment(evidence_content)[Symbol.iterator]();
while ((sentence = sentence_it.next().value)) {
segment = sentence.segment.trim();
if (segment) {
all_segments.push({
id: all_segments.length,
text: segment,
});
}
}
}
}
let miniSearch = new MiniSearch({
fields: ["text"], // fields to index for full-text search
storeFields: ["text"],
});
// Index all documents
miniSearch.addAll(all_segments);
last_bot_message.addEventListener("mouseup", () => {
let selection = window.getSelection().toString();
let results = miniSearch.search(selection);
if (results.length == 0) {
return;
}
let matched_text = results[0].text;
console.log("query\n", selection, "\nmatched text\n", matched_text);
var evidences = document.querySelectorAll(
"#html-info-panel > div:last-child > div > details.evidence div.evidence-content"
);
// check if modal is open
var modal = document.getElementById("pdf-modal");
// convert all <mark> in evidences to normal text
evidences.forEach((evidence) => {
evidence.querySelectorAll("mark").forEach((mark) => {
mark.outerHTML = mark.innerText;
});
});
// highlight matched_text in evidences
for (var evidence of evidences) {
var evidence_content = evidence.textContent.replace(/[\r\n]+/g, " ");
if (evidence_content.includes(matched_text)) {
// select all p and li elements
paragraphs = evidence.querySelectorAll("p, li");
for (var p of paragraphs) {
var p_content = p.textContent.replace(/[\r\n]+/g, " ");
if (p_content.includes(matched_text)) {
p.innerHTML = p_content.replace(
matched_text,
"<mark>" + matched_text + "</mark>"
);
console.log("highlighted", matched_text, "in", p);
if (modal.style.display == "block") {
// trigger on click event of PDF Preview link
var detail_elem = p;
// traverse up the DOM tree to find the parent element with tag detail
while (detail_elem.tagName.toLowerCase() != "details") {
detail_elem = detail_elem.parentElement;
}
detail_elem.getElementsByClassName("pdf-link").item(0).click();
} else {
p.scrollIntoView({ behavior: "smooth", block: "center" });
}
break;
}
}
}
}
});
};
globalThis.spawnDocument = (content, options) => {
let opt = {
window: "",
closeChild: true,
childId: "_blank",
};
Object.assign(opt, options);
// minimal error checking
if (
content &&
typeof content.toString == "function" &&
content.toString().length
) {
let child = window.open("", opt.childId, opt.window);
child.document.write(content.toString());
if (opt.closeChild) child.document.close();
return child;
}
};
globalThis.fillChatInput = (event) => {
let chatInput = document.querySelector("#chat-input textarea");
// fill the chat input with the clicked div text
chatInput.value = "Explain " + event.target.textContent;
var evt = new Event("change");
chatInput.dispatchEvent(new Event("input", { bubbles: true }));
chatInput.focus();
};
}

View File

@@ -1,138 +1,186 @@
function onBlockLoad () {
var infor_panel_scroll_pos = 0;
globalThis.createModal = () => {
// Create modal for the 1st time if it does not exist
var modal = document.getElementById("pdf-modal");
var old_position = null;
var old_width = null;
var old_left = null;
var expanded = false;
function onBlockLoad() {
var infor_panel_scroll_pos = 0;
globalThis.createModal = () => {
// Create modal for the 1st time if it does not exist
var modal = document.getElementById("pdf-modal");
var old_position = null;
var old_width = null;
var old_left = null;
var expanded = false;
modal.id = "pdf-modal";
modal.className = "modal";
modal.innerHTML = `
modal.id = "pdf-modal";
modal.className = "modal";
modal.innerHTML = `
<div class="modal-content">
<div class="modal-header">
<span class="close" id="modal-close">&times;</span>
<span class="close" id="modal-expand">&#x26F6;</span>
</div>
<div class="modal-body">
<pdfjs-viewer-element id="pdf-viewer" viewer-path="GRADIO_ROOT_PATH/file=PDFJS_PREBUILT_DIR" locale="en" phrase="true">
<pdfjs-viewer-element id="pdf-viewer" viewer-path="GR_FILE_ROOT_PATH/file=PDFJS_PREBUILT_DIR" locale="en" phrase="true">
</pdfjs-viewer-element>
</div>
</div>
`;
modal.querySelector("#modal-close").onclick = function() {
modal.style.display = "none";
var info_panel = document.getElementById("html-info-panel");
if (info_panel) {
info_panel.style.display = "block";
}
var scrollableDiv = document.getElementById("chat-info-panel");
scrollableDiv.scrollTop = infor_panel_scroll_pos;
};
modal.querySelector("#modal-expand").onclick = function () {
expanded = !expanded;
if (expanded) {
old_position = modal.style.position;
old_left = modal.style.left;
old_width = modal.style.width;
modal.style.position = "fixed";
modal.style.width = "70%";
modal.style.left = "15%";
modal.style.height = "100dvh";
} else {
modal.style.position = old_position;
modal.style.width = old_width;
modal.style.left = old_left;
modal.style.height = "85dvh";
}
};
}
globalThis.compareText = (search_phrase, page_label) => {
var iframe = document.querySelector("#pdf-viewer").iframe;
var innerDoc = (iframe.contentDocument) ? iframe.contentDocument : iframe.contentWindow.document;
var query_selector = (
"#viewer > div[data-page-number='" +
page_label +
"'] > div.textLayer > span"
);
var page_spans = innerDoc.querySelectorAll(query_selector);
for (var i = 0; i < page_spans.length; i++) {
var span = page_spans[i];
if (
span.textContent.length > 4 &&
(
search_phrase.includes(span.textContent) ||
span.textContent.includes(search_phrase)
)
) {
span.innerHTML = "<span class='highlight selected'>" + span.textContent + "</span>";
} else {
// if span is already highlighted, remove it
if (span.querySelector(".highlight")) {
span.innerHTML = span.textContent;
}
}
}
}
// Sleep function using Promise and setTimeout
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
// Function to open modal and display PDF
globalThis.openModal = async (event) => {
event.preventDefault();
var target = event.currentTarget;
var src = target.getAttribute("data-src");
var page = target.getAttribute("data-page");
var search = target.getAttribute("data-search");
var phrase = target.getAttribute("data-phrase");
var pdfViewer = document.getElementById("pdf-viewer");
current_src = pdfViewer.getAttribute("src");
if (current_src != src) {
pdfViewer.setAttribute("src", src);
}
// pdfViewer.setAttribute("phrase", phrase);
// pdfViewer.setAttribute("search", search);
pdfViewer.setAttribute("page", page);
var scrollableDiv = document.getElementById("chat-info-panel");
infor_panel_scroll_pos = scrollableDiv.scrollTop;
var modal = document.getElementById("pdf-modal")
modal.style.display = "block";
modal.querySelector("#modal-close").onclick = function () {
modal.style.display = "none";
var info_panel = document.getElementById("html-info-panel");
if (info_panel) {
info_panel.style.display = "none";
info_panel.style.display = "block";
}
scrollableDiv.scrollTop = 0;
var scrollableDiv = document.getElementById("chat-info-panel");
scrollableDiv.scrollTop = infor_panel_scroll_pos;
};
/* search for text inside PDF page */
await sleep(500);
compareText(search, page);
modal.querySelector("#modal-expand").onclick = function () {
expanded = !expanded;
if (expanded) {
old_position = modal.style.position;
old_left = modal.style.left;
old_width = modal.style.width;
modal.style.position = "fixed";
modal.style.width = "70%";
modal.style.left = "15%";
modal.style.height = "100dvh";
} else {
modal.style.position = old_position;
modal.style.width = old_width;
modal.style.left = old_left;
modal.style.height = "85dvh";
}
};
};
function matchRatio(str1, str2) {
let n = str1.length;
let m = str2.length;
let lcs = [];
for (let i = 0; i <= n; i++) {
lcs[i] = [];
for (let j = 0; j <= m; j++) {
lcs[i][j] = 0;
}
}
globalThis.assignPdfOnclickEvent = () => {
// Get all links and attach click event
var links = document.getElementsByClassName("pdf-link");
for (var i = 0; i < links.length; i++) {
links[i].onclick = openModal;
let result = "";
let max = 0;
for (let i = 0; i < n; i++) {
for (let j = 0; j < m; j++) {
if (str1[i] === str2[j]) {
lcs[i + 1][j + 1] = lcs[i][j] + 1;
if (lcs[i + 1][j + 1] > max) {
max = lcs[i + 1][j + 1];
result = str1.substring(i - max + 1, i + 1);
}
}
}
}
var created_modal = document.getElementById("pdf-viewer");
if (!created_modal) {
createModal();
return result.length / Math.min(n, m);
}
globalThis.compareText = (search_phrases, page_label) => {
var iframe = document.querySelector("#pdf-viewer").iframe;
var innerDoc = iframe.contentDocument
? iframe.contentDocument
: iframe.contentWindow.document;
var renderedPages = innerDoc.querySelectorAll("div#viewer div.page");
if (renderedPages.length == 0) {
// if pages are not rendered yet, wait and try again
setTimeout(() => compareText(search_phrases, page_label), 2000);
return;
}
var query_selector =
"#viewer > div[data-page-number='" +
page_label +
"'] > div.textLayer > span";
var page_spans = innerDoc.querySelectorAll(query_selector);
for (var i = 0; i < page_spans.length; i++) {
var span = page_spans[i];
if (
span.textContent.length > 4 &&
search_phrases.some(
(phrase) => matchRatio(phrase, span.textContent) > 0.5
)
) {
span.innerHTML =
"<span class='highlight selected'>" + span.textContent + "</span>";
} else {
// if span is already highlighted, remove it
if (span.querySelector(".highlight")) {
span.innerHTML = span.textContent;
}
}
}
};
// Sleep function using Promise and setTimeout
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
// Function to open modal and display PDF
globalThis.openModal = async (event) => {
event.preventDefault();
var target = event.currentTarget;
var src = target.getAttribute("data-src");
var page = target.getAttribute("data-page");
var search = target.getAttribute("data-search");
var highlighted_spans =
target.parentElement.parentElement.querySelectorAll("mark");
// Get text from highlighted spans
var search_phrases = Array.from(highlighted_spans).map(
(span) => span.textContent
);
// Use regex to strip 【id】from search phrases
search_phrases = search_phrases.map((phrase) =>
phrase.replace(/【\d+】/g, "")
);
// var phrase = target.getAttribute("data-phrase");
var pdfViewer = document.getElementById("pdf-viewer");
current_src = pdfViewer.getAttribute("src");
if (current_src != src) {
pdfViewer.setAttribute("src", src);
}
// pdfViewer.setAttribute("phrase", phrase);
// pdfViewer.setAttribute("search", search);
pdfViewer.setAttribute("page", page);
var scrollableDiv = document.getElementById("chat-info-panel");
infor_panel_scroll_pos = scrollableDiv.scrollTop;
var modal = document.getElementById("pdf-modal");
modal.style.display = "block";
var info_panel = document.getElementById("html-info-panel");
if (info_panel) {
info_panel.style.display = "none";
}
scrollableDiv.scrollTop = 0;
/* search for text inside PDF page */
await sleep(500);
compareText(search_phrases, page);
};
globalThis.assignPdfOnclickEvent = () => {
// Get all links and attach click event
var links = document.getElementsByClassName("pdf-link");
for (var i = 0; i < links.length; i++) {
links[i].onclick = openModal;
}
};
var created_modal = document.getElementById("pdf-viewer");
if (!created_modal) {
createModal();
}
}

View File

@@ -29,7 +29,7 @@ class BaseConversation(SQLModel):
datetime.datetime.now(get_localzone()).strftime("%Y-%m-%d %H:%M:%S")
)
)
user: int = Field(default=0) # For now we only have one user
user: str = Field(default="") # For now we only have one user
is_public: bool = Field(default=False)
@@ -55,7 +55,9 @@ class BaseUser(SQLModel):
__table_args__ = {"extend_existing": True}
id: Optional[int] = Field(default=None, primary_key=True)
id: str = Field(
default_factory=lambda: uuid.uuid4().hex, primary_key=True, index=True
)
username: str = Field(unique=True)
username_lower: str = Field(unique=True)
password: str
@@ -76,7 +78,7 @@ class BaseSettings(SQLModel):
id: str = Field(
default_factory=lambda: uuid.uuid4().hex, primary_key=True, index=True
)
user: int = Field(default=0)
user: str = Field(default="")
setting: dict = Field(default={}, sa_column=Column(JSON))
@@ -97,4 +99,4 @@ class BaseIssueReport(SQLModel):
issues: dict = Field(default={}, sa_column=Column(JSON))
chat: Optional[dict] = Field(default=None, sa_column=Column(JSON))
settings: Optional[dict] = Field(default=None, sa_column=Column(JSON))
user: Optional[int] = Field(default=None)
user: Optional[str] = Field(default=None)

View File

@@ -17,6 +17,10 @@ from kotaemon.storages import BaseDocumentStore, BaseVectorStore
from .base import BaseFileIndexIndexing, BaseFileIndexRetriever
def generate_uuid():
return str(uuid.uuid4())
class FileIndex(BaseIndex):
"""
File index to store and allow retrieval of files
@@ -76,7 +80,7 @@ class FileIndex(BaseIndex):
"date_created": Column(
DateTime(timezone=True), default=datetime.now(get_localzone())
),
"user": Column(Integer, default=1),
"user": Column(String, default=""),
"note": Column(
MutableDict.as_mutable(JSON), # type: ignore
default={},
@@ -101,7 +105,7 @@ class FileIndex(BaseIndex):
"date_created": Column(
DateTime(timezone=True), default=datetime.now(get_localzone())
),
"user": Column(Integer, default=1),
"user": Column(String, default=""),
"note": Column(
MutableDict.as_mutable(JSON), # type: ignore
default={},
@@ -117,7 +121,7 @@ class FileIndex(BaseIndex):
"source_id": Column(String),
"target_id": Column(String),
"relation_type": Column(String),
"user": Column(Integer, default=1),
"user": Column(String, default=""),
},
)
FileGroup = type(
@@ -125,12 +129,20 @@ class FileIndex(BaseIndex):
(Base,),
{
"__tablename__": f"index__{self.id}__group",
"id": Column(Integer, primary_key=True, autoincrement=True),
"__table_args__": (
UniqueConstraint("name", "user", name="_name_user_uc"),
),
"id": Column(
String,
primary_key=True,
default=lambda: str(uuid.uuid4()),
unique=True,
),
"date_created": Column(
DateTime(timezone=True), default=datetime.now(get_localzone())
),
"name": Column(String, unique=True),
"user": Column(Integer, default=1),
"name": Column(String),
"user": Column(String, default=""),
"data": Column(
MutableDict.as_mutable(JSON), # type: ignore
default={"files": []},

View File

@@ -20,9 +20,14 @@ from sqlalchemy.orm import Session
from theflow.settings import settings as flowsettings
from ...utils.commands import WEB_SEARCH_COMMAND
from ...utils.rate_limit import check_rate_limit
from .utils import download_arxiv_pdf, is_arxiv_url
DOWNLOAD_MESSAGE = "Press again to download"
KH_DEMO_MODE = getattr(flowsettings, "KH_DEMO_MODE", False)
KH_SSO_ENABLED = getattr(flowsettings, "KH_SSO_ENABLED", False)
DOWNLOAD_MESSAGE = "Start download"
MAX_FILENAME_LENGTH = 20
MAX_FILE_COUNT = 200
chat_input_focus_js = """
function() {
@@ -31,6 +36,15 @@ function() {
}
"""
chat_input_focus_js_with_submit = """
function() {
let chatInput = document.querySelector("#chat-input textarea");
let chatInputSubmit = document.querySelector("#chat-input button.submit-button");
chatInputSubmit.click();
chatInput.focus();
}
"""
update_file_list_js = """
function(file_list) {
var values = [];
@@ -53,6 +67,7 @@ function(file_list) {
allowSpaces: true,
})
input_box = document.querySelector('#chat-input textarea');
tribute.detach(input_box);
tribute.attach(input_box);
}
""".replace(
@@ -128,7 +143,9 @@ class FileIndexPage(BasePage):
# TODO: on_building_ui is not correctly named if it's always called in
# the constructor
self.public_events = [f"onFileIndex{index.id}Changed"]
self.on_building_ui()
if not KH_DEMO_MODE:
self.on_building_ui()
def upload_instruction(self) -> str:
msgs = []
@@ -201,10 +218,10 @@ class FileIndexPage(BasePage):
with gr.Accordion("Advance options", open=False):
with gr.Row():
self.download_all_button = gr.DownloadButton(
"Download all files",
visible=True,
)
if not KH_SSO_ENABLED:
self.download_all_button = gr.DownloadButton(
"Download all files",
)
self.delete_all_button = gr.Button(
"Delete all files",
variant="stop",
@@ -249,13 +266,13 @@ class FileIndexPage(BasePage):
)
with gr.Column(visible=False) as self._group_info_panel:
self.selected_group_id = gr.State(value=None)
self.group_label = gr.Markdown()
self.group_name = gr.Textbox(
label="Group name",
placeholder="Group name",
lines=1,
max_lines=1,
interactive=False,
)
self.group_files = gr.Dropdown(
label="Attached files",
@@ -290,7 +307,7 @@ class FileIndexPage(BasePage):
)
gr.Markdown("(separated by new line)")
with gr.Accordion("Advanced indexing options", open=True):
with gr.Accordion("Advanced indexing options", open=False):
with gr.Row():
self.reindex = gr.Checkbox(
value=False, label="Force reindex file", container=False
@@ -324,6 +341,9 @@ class FileIndexPage(BasePage):
def on_subscribe_public_events(self):
"""Subscribe to the declared public event of the app"""
if KH_DEMO_MODE:
return
self._app.subscribe_event(
name=f"onFileIndex{self._index.id}Changed",
definition={
@@ -500,6 +520,34 @@ class FileIndexPage(BasePage):
return not is_zipped_state, new_button
def download_single_file_simple(self, is_zipped_state, file_html, file_id):
with Session(engine) as session:
source = session.execute(
select(self._index._resources["Source"]).where(
self._index._resources["Source"].id == file_id
)
).first()
if source:
target_file_name = Path(source[0].name)
# create a temporary file with a path to export
output_file_path = os.path.join(
flowsettings.KH_ZIP_OUTPUT_DIR, target_file_name.stem + ".html"
)
with open(output_file_path, "w") as f:
f.write(file_html)
if is_zipped_state:
new_button = gr.DownloadButton(label="Download", value=None)
else:
# export the file path
new_button = gr.DownloadButton(
label=DOWNLOAD_MESSAGE,
value=output_file_path,
)
return not is_zipped_state, new_button
def download_all_files(self):
if self._index.config.get("private", False):
raise gr.Error("This feature is not available for private collection.")
@@ -543,8 +591,145 @@ class FileIndexPage(BasePage):
gr.update(visible=True),
]
def on_register_quick_uploads(self):
try:
# quick file upload event registration of first Index only
if self._index.id == 1:
self.quick_upload_state = gr.State(value=[])
print("Setting up quick upload event")
# override indexing function from chat page
self._app.chat_page.first_indexing_url_fn = (
self.index_fn_url_with_default_loaders
)
if not KH_DEMO_MODE:
quickUploadedEvent = (
self._app.chat_page.quick_file_upload.upload(
fn=lambda: gr.update(
value="Please wait for the indexing process "
"to complete before adding your question."
),
outputs=self._app.chat_page.quick_file_upload_status,
)
.then(
fn=self.index_fn_file_with_default_loaders,
inputs=[
self._app.chat_page.quick_file_upload,
gr.State(value=False),
self._app.settings_state,
self._app.user_id,
],
outputs=self.quick_upload_state,
concurrency_limit=10,
)
.success(
fn=lambda: [
gr.update(value=None),
gr.update(value="select"),
],
outputs=[
self._app.chat_page.quick_file_upload,
self._app.chat_page._indices_input[0],
],
)
)
for event in self._app.get_event(
f"onFileIndex{self._index.id}Changed"
):
quickUploadedEvent = quickUploadedEvent.then(**event)
quickUploadedEvent = (
quickUploadedEvent.success(
fn=lambda x: x,
inputs=self.quick_upload_state,
outputs=self._app.chat_page._indices_input[1],
)
.then(
fn=lambda: gr.update(value="Indexing completed."),
outputs=self._app.chat_page.quick_file_upload_status,
)
.then(
fn=self.list_file,
inputs=[self._app.user_id, self.filter],
outputs=[self.file_list_state, self.file_list],
concurrency_limit=20,
)
.then(
fn=lambda: True,
inputs=None,
outputs=None,
js=chat_input_focus_js_with_submit,
)
)
quickURLUploadedEvent = (
self._app.chat_page.quick_urls.submit(
fn=lambda: gr.update(
value="Please wait for the indexing process "
"to complete before adding your question."
),
outputs=self._app.chat_page.quick_file_upload_status,
)
.then(
fn=self.index_fn_url_with_default_loaders,
inputs=[
self._app.chat_page.quick_urls,
gr.State(value=False),
self._app.settings_state,
self._app.user_id,
],
outputs=self.quick_upload_state,
concurrency_limit=10,
)
.success(
fn=lambda: [
gr.update(value=None),
gr.update(value="select"),
],
outputs=[
self._app.chat_page.quick_urls,
self._app.chat_page._indices_input[0],
],
)
)
for event in self._app.get_event(f"onFileIndex{self._index.id}Changed"):
quickURLUploadedEvent = quickURLUploadedEvent.then(**event)
quickURLUploadedEvent = quickURLUploadedEvent.success(
fn=lambda x: x,
inputs=self.quick_upload_state,
outputs=self._app.chat_page._indices_input[1],
).then(
fn=lambda: gr.update(value="Indexing completed."),
outputs=self._app.chat_page.quick_file_upload_status,
)
if not KH_DEMO_MODE:
quickURLUploadedEvent = quickURLUploadedEvent.then(
fn=self.list_file,
inputs=[self._app.user_id, self.filter],
outputs=[self.file_list_state, self.file_list],
concurrency_limit=20,
)
quickURLUploadedEvent = quickURLUploadedEvent.then(
fn=lambda: True,
inputs=None,
outputs=None,
js=chat_input_focus_js_with_submit,
)
except Exception as e:
print(e)
def on_register_events(self):
"""Register all events to the app"""
self.on_register_quick_uploads()
if KH_DEMO_MODE:
return
onDeleted = (
self.delete_button.click(
fn=self.delete_event,
@@ -606,12 +791,13 @@ class FileIndexPage(BasePage):
],
)
self.download_all_button.click(
fn=self.download_all_files,
inputs=[],
outputs=self.download_all_button,
show_progress="hidden",
)
if not KH_SSO_ENABLED:
self.download_all_button.click(
fn=self.download_all_files,
inputs=[],
outputs=self.download_all_button,
show_progress="hidden",
)
self.delete_all_button.click(
self.show_delete_all_confirm,
@@ -659,12 +845,20 @@ class FileIndexPage(BasePage):
],
)
self.download_single_button.click(
fn=self.download_single_file,
inputs=[self.is_zipped_state, self.selected_file_id],
outputs=[self.is_zipped_state, self.download_single_button],
show_progress="hidden",
)
if not KH_SSO_ENABLED:
self.download_single_button.click(
fn=self.download_single_file,
inputs=[self.is_zipped_state, self.selected_file_id],
outputs=[self.is_zipped_state, self.download_single_button],
show_progress="hidden",
)
else:
self.download_single_button.click(
fn=self.download_single_file_simple,
inputs=[self.is_zipped_state, self.chunks, self.selected_file_id],
outputs=[self.is_zipped_state, self.download_single_button],
show_progress="hidden",
)
onUploaded = (
self.upload_button.click(
@@ -689,121 +883,6 @@ class FileIndexPage(BasePage):
)
)
try:
# quick file upload event registration of first Index only
if self._index.id == 1:
self.quick_upload_state = gr.State(value=[])
print("Setting up quick upload event")
# override indexing function from chat page
self._app.chat_page.first_indexing_url_fn = (
self.index_fn_url_with_default_loaders
)
quickUploadedEvent = (
self._app.chat_page.quick_file_upload.upload(
fn=lambda: gr.update(
value="Please wait for the indexing process "
"to complete before adding your question."
),
outputs=self._app.chat_page.quick_file_upload_status,
)
.then(
fn=self.index_fn_file_with_default_loaders,
inputs=[
self._app.chat_page.quick_file_upload,
gr.State(value=False),
self._app.settings_state,
self._app.user_id,
],
outputs=self.quick_upload_state,
)
.success(
fn=lambda: [
gr.update(value=None),
gr.update(value="select"),
],
outputs=[
self._app.chat_page.quick_file_upload,
self._app.chat_page._indices_input[0],
],
)
)
for event in self._app.get_event(f"onFileIndex{self._index.id}Changed"):
quickUploadedEvent = quickUploadedEvent.then(**event)
quickURLUploadedEvent = (
self._app.chat_page.quick_urls.submit(
fn=lambda: gr.update(
value="Please wait for the indexing process "
"to complete before adding your question."
),
outputs=self._app.chat_page.quick_file_upload_status,
)
.then(
fn=self.index_fn_url_with_default_loaders,
inputs=[
self._app.chat_page.quick_urls,
gr.State(value=True),
self._app.settings_state,
self._app.user_id,
],
outputs=self.quick_upload_state,
)
.success(
fn=lambda: [
gr.update(value=None),
gr.update(value="select"),
],
outputs=[
self._app.chat_page.quick_urls,
self._app.chat_page._indices_input[0],
],
)
)
for event in self._app.get_event(f"onFileIndex{self._index.id}Changed"):
quickURLUploadedEvent = quickURLUploadedEvent.then(**event)
quickUploadedEvent.success(
fn=lambda x: x,
inputs=self.quick_upload_state,
outputs=self._app.chat_page._indices_input[1],
).then(
fn=lambda: gr.update(value="Indexing completed."),
outputs=self._app.chat_page.quick_file_upload_status,
).then(
fn=self.list_file,
inputs=[self._app.user_id, self.filter],
outputs=[self.file_list_state, self.file_list],
concurrency_limit=20,
).then(
fn=lambda: True,
inputs=None,
outputs=None,
js=chat_input_focus_js,
)
quickURLUploadedEvent.success(
fn=lambda x: x,
inputs=self.quick_upload_state,
outputs=self._app.chat_page._indices_input[1],
).then(
fn=lambda: gr.update(value="Indexing completed."),
outputs=self._app.chat_page.quick_file_upload_status,
).then(
fn=self.list_file,
inputs=[self._app.user_id, self.filter],
outputs=[self.file_list_state, self.file_list],
concurrency_limit=20,
).then(
fn=lambda: True,
inputs=None,
outputs=None,
js=chat_input_focus_js,
)
except Exception as e:
print(e)
uploadedEvent = onUploaded.then(
fn=self.list_file,
inputs=[self._app.user_id, self.filter],
@@ -844,7 +923,12 @@ class FileIndexPage(BasePage):
self.group_list.select(
fn=self.interact_group_list,
inputs=[self.group_list_state],
outputs=[self.group_label, self.group_name, self.group_files],
outputs=[
self.group_label,
self.selected_group_id,
self.group_name,
self.group_files,
],
show_progress="hidden",
).then(
fn=lambda: (
@@ -875,8 +959,9 @@ class FileIndexPage(BasePage):
gr.update(visible=False),
gr.update(value="### Add new group"),
gr.update(visible=True),
gr.update(value="", interactive=True),
gr.update(value=""),
gr.update(value=[]),
None,
],
outputs=[
self.group_add_button,
@@ -884,12 +969,13 @@ class FileIndexPage(BasePage):
self._group_info_panel,
self.group_name,
self.group_files,
self.selected_group_id,
],
)
self.group_chat_button.click(
fn=self.set_group_id_selector,
inputs=[self.group_name],
inputs=[self.selected_group_id],
outputs=[
self._index.get_selector_component_ui().selector,
self._index.get_selector_component_ui().mode,
@@ -897,44 +983,53 @@ class FileIndexPage(BasePage):
],
)
onGroupClosedEvent = {
"fn": lambda: [
gr.update(visible=True),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
None,
],
"outputs": [
self.group_add_button,
self._group_info_panel,
self.group_close_button,
self.group_delete_button,
self.group_chat_button,
self.selected_group_id,
],
}
self.group_close_button.click(**onGroupClosedEvent)
onGroupSaved = (
self.group_save_button.click(
fn=self.save_group,
inputs=[self.group_name, self.group_files, self._app.user_id],
inputs=[
self.selected_group_id,
self.group_name,
self.group_files,
self._app.user_id,
],
)
.then(
self.list_group,
inputs=[self._app.user_id, self.file_list_state],
outputs=[self.group_list_state, self.group_list],
)
.then(
fn=lambda: gr.update(visible=False),
outputs=[self._group_info_panel],
.then(**onGroupClosedEvent)
)
onGroupDeleted = (
self.group_delete_button.click(
fn=self.delete_group,
inputs=[self.selected_group_id],
)
)
self.group_close_button.click(
fn=lambda: [
gr.update(visible=True),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
],
outputs=[
self.group_add_button,
self._group_info_panel,
self.group_close_button,
self.group_delete_button,
self.group_chat_button,
],
)
onGroupDeleted = self.group_delete_button.click(
fn=self.delete_group,
inputs=[self.group_name],
).then(
self.list_group,
inputs=[self._app.user_id, self.file_list_state],
outputs=[self.group_list_state, self.group_list],
.then(
self.list_group,
inputs=[self._app.user_id, self.file_list_state],
outputs=[self.group_list_state, self.group_list],
)
.then(**onGroupClosedEvent)
)
for event in self._app.get_event(f"onFileIndex{self._index.id}Changed"):
@@ -943,10 +1038,21 @@ class FileIndexPage(BasePage):
def _on_app_created(self):
"""Called when the app is created"""
if KH_DEMO_MODE:
return
self._app.app.load(
self.list_file,
inputs=[self._app.user_id, self.filter],
outputs=[self.file_list_state, self.file_list],
).then(
self.list_group,
inputs=[self._app.user_id, self.file_list_state],
outputs=[self.group_list_state, self.group_list],
).then(
self.list_file_names,
inputs=[self.file_list_state],
outputs=[self.group_files],
)
def _may_extract_zip(self, files, zip_dir: str):
@@ -1089,19 +1195,67 @@ class FileIndexPage(BasePage):
return exist_ids + returned_ids
def index_fn_url_with_default_loaders(self, urls, reindex: bool, settings, user_id):
returned_ids = []
def index_fn_url_with_default_loaders(
self,
urls,
reindex: bool,
settings,
user_id,
request: gr.Request,
):
if KH_DEMO_MODE:
check_rate_limit("file_upload", request)
returned_ids: list[str] = []
settings = deepcopy(settings)
settings[f"index.options.{self._index.id}.reader_mode"] = "default"
settings[f"index.options.{self._index.id}.quick_index_mode"] = True
if urls:
_iter = self.index_fn([], urls, reindex, settings, user_id)
try:
while next(_iter):
pass
except StopIteration as e:
returned_ids = e.value
if KH_DEMO_MODE:
urls_splitted = urls.split("\n")
if not all(is_arxiv_url(url) for url in urls_splitted):
raise ValueError("All URLs must be valid arXiv URLs")
output_files = [
download_arxiv_pdf(
url,
output_path=os.environ.get("GRADIO_TEMP_DIR", "/tmp"),
)
for url in urls_splitted
]
exist_ids = []
to_process_files = []
for str_file_path in output_files:
file_path = Path(str_file_path)
exist_id = (
self._index.get_indexing_pipeline(settings, user_id)
.route(file_path)
.get_id_if_exists(file_path)
)
if exist_id:
exist_ids.append(exist_id)
else:
to_process_files.append(str_file_path)
returned_ids = []
if to_process_files:
_iter = self.index_fn(to_process_files, [], reindex, settings, user_id)
try:
while next(_iter):
pass
except StopIteration as e:
returned_ids = e.value
returned_ids = exist_ids + returned_ids
else:
if urls:
_iter = self.index_fn([], urls, reindex, settings, user_id)
try:
while next(_iter):
pass
except StopIteration as e:
returned_ids = e.value
return returned_ids
@@ -1254,6 +1408,7 @@ class FileIndexPage(BasePage):
return gr.update(choices=file_names)
def list_group(self, user_id, file_list):
# supply file_list to display the file names in the group
if file_list:
file_id_to_name = {item["id"]: item["name"] for item in file_list}
else:
@@ -1319,27 +1474,42 @@ class FileIndexPage(BasePage):
return results, group_list
def set_group_id_selector(self, selected_group_name):
def set_group_id_selector(self, selected_group_id):
FileGroup = self._index._resources["FileGroup"]
# check if group_name exist
with Session(engine) as session:
current_group = (
session.query(FileGroup).filter_by(name=selected_group_name).first()
session.query(FileGroup).filter_by(id=selected_group_id).first()
)
file_ids = [json.dumps(current_group.data["files"])]
return [file_ids, "select", gr.Tabs(selected="chat-tab")]
def save_group(self, group_name, group_files, user_id):
def save_group(self, group_id, group_name, group_files, user_id):
FileGroup = self._index._resources["FileGroup"]
current_group = None
# check if group_name exist
with Session(engine) as session:
current_group = session.query(FileGroup).filter_by(name=group_name).first()
if group_id:
current_group = session.query(FileGroup).filter_by(id=group_id).first()
# update current group with new info
current_group.name = group_name
current_group.data["files"] = group_files # Update the files
session.commit()
else:
current_group = (
session.query(FileGroup)
.filter_by(
name=group_name,
user=user_id,
)
.first()
)
if current_group:
raise gr.Error(f"Group {group_name} already exists")
if not current_group:
current_group = FileGroup(
name=group_name,
data={"files": group_files}, # type: ignore
@@ -1347,34 +1517,31 @@ class FileIndexPage(BasePage):
)
session.add(current_group)
session.commit()
else:
# update current group with new info
current_group.name = group_name
current_group.data["files"] = group_files # Update the files
session.commit()
group_id = current_group.id
gr.Info(f"Group {group_name} has been saved")
return group_id
def delete_group(self, group_name):
def delete_group(self, group_id):
if not group_id:
raise gr.Error("No group is selected")
FileGroup = self._index._resources["FileGroup"]
group_id = None
with Session(engine) as session:
group = session.execute(
select(FileGroup).where(FileGroup.name == group_name)
select(FileGroup).where(FileGroup.id == group_id)
).first()
if group:
item = group[0]
group_id = item.id
group_name = item.name
session.delete(item)
session.commit()
gr.Info(f"Group {group_name} has been deleted")
else:
raise gr.Error(f"Group {group_name} not found")
raise gr.Error("No group found")
return group_id
return None
def interact_file_list(self, list_files, ev: gr.SelectData):
if ev.value == "-" and ev.index[0] == 0:
@@ -1394,9 +1561,11 @@ class FileIndexPage(BasePage):
raise gr.Error("No group is selected")
selected_item = list_groups[selected_id]
selected_group_id = selected_item["id"]
return (
"### Group Information",
gr.update(value=selected_item["name"], interactive=False),
selected_group_id,
selected_item["name"],
selected_item["files"],
)
@@ -1525,6 +1694,10 @@ class FileSelector(BasePage):
self._index._resources["Source"].user == user_id
)
if KH_DEMO_MODE:
# limit query by MAX_FILE_COUNT
statement = statement.limit(MAX_FILE_COUNT)
results = session.execute(statement).all()
for result in results:
available_ids.append(result[0].id)

View File

@@ -0,0 +1,58 @@
import os
import requests
# regex patterns for Arxiv URL
ARXIV_URL_PATTERNS = [
"https://arxiv.org/abs/",
"https://arxiv.org/pdf/",
]
ILLEGAL_NAME_CHARS = ["\\", "/", ":", "*", "?", '"', "<", ">", "|"]
def clean_name(name):
for char in ILLEGAL_NAME_CHARS:
name = name.replace(char, "_")
return name
def is_arxiv_url(url):
return any(url.startswith(pattern) for pattern in ARXIV_URL_PATTERNS)
# download PDF from Arxiv URL
def download_arxiv_pdf(url, output_path):
if not is_arxiv_url(url):
raise ValueError("Invalid Arxiv URL")
is_abstract_url = "abs" in url
if is_abstract_url:
pdf_url = url.replace("abs", "pdf")
abstract_url = url
else:
pdf_url = url
abstract_url = url.replace("pdf", "abs")
# get paper name from abstract url
response = requests.get(abstract_url)
# parse HTML response and get h1.title
from bs4 import BeautifulSoup
soup = BeautifulSoup(response.content, "html.parser")
name = clean_name(
soup.find("h1", class_="title").text.strip().replace("Title:", "")
)
if not name:
raise ValueError("Failed to get paper name")
output_file_path = os.path.join(output_path, name + ".pdf")
# prevent downloading if file already exists
if not os.path.exists(output_file_path):
response = requests.get(pdf_url)
with open(output_file_path, "wb") as f:
f.write(response.content)
return output_file_path

View File

@@ -60,6 +60,7 @@ class LLMManager:
LCAnthropicChat,
LCCohereChat,
LCGeminiChat,
LCOllamaChat,
LlamaCppChat,
)
@@ -69,6 +70,7 @@ class LLMManager:
LCAnthropicChat,
LCGeminiChat,
LCCohereChat,
LCOllamaChat,
LlamaCppChat,
]

View File

@@ -9,6 +9,7 @@ from ktem.pages.setup import SetupPage
from theflow.settings import settings as flowsettings
KH_DEMO_MODE = getattr(flowsettings, "KH_DEMO_MODE", False)
KH_SSO_ENABLED = getattr(flowsettings, "KH_SSO_ENABLED", False)
KH_ENABLE_FIRST_SETUP = getattr(flowsettings, "KH_ENABLE_FIRST_SETUP", False)
KH_APP_DATA_EXISTS = getattr(flowsettings, "KH_APP_DATA_EXISTS", True)
@@ -19,7 +20,7 @@ if config("KH_FIRST_SETUP", default=False, cast=bool):
def toggle_first_setup_visibility():
global KH_APP_DATA_EXISTS
is_first_setup = KH_DEMO_MODE or not KH_APP_DATA_EXISTS
is_first_setup = not KH_DEMO_MODE and not KH_APP_DATA_EXISTS
KH_APP_DATA_EXISTS = True
return gr.update(visible=is_first_setup), gr.update(visible=not is_first_setup)
@@ -70,7 +71,7 @@ class App(BaseApp):
"indices-tab",
],
id="indices-tab",
visible=not self.f_user_management,
visible=not self.f_user_management and not KH_DEMO_MODE,
) as self._tabs[f"{index.id}-tab"]:
page = index.get_index_page_ui()
setattr(self, f"_index_{index.id}", page)
@@ -80,7 +81,7 @@ class App(BaseApp):
elem_id="indices-tab",
elem_classes=["fill-main-area-height", "scrollable", "indices-tab"],
id="indices-tab",
visible=not self.f_user_management,
visible=not self.f_user_management and not KH_DEMO_MODE,
) as self._tabs["indices-tab"]:
for index in self.index_manager.indices:
with gr.Tab(
@@ -90,23 +91,25 @@ class App(BaseApp):
page = index.get_index_page_ui()
setattr(self, f"_index_{index.id}", page)
with gr.Tab(
"Resources",
elem_id="resources-tab",
id="resources-tab",
visible=not self.f_user_management,
elem_classes=["fill-main-area-height", "scrollable"],
) as self._tabs["resources-tab"]:
self.resources_page = ResourcesTab(self)
if not KH_DEMO_MODE:
if not KH_SSO_ENABLED:
with gr.Tab(
"Resources",
elem_id="resources-tab",
id="resources-tab",
visible=not self.f_user_management,
elem_classes=["fill-main-area-height", "scrollable"],
) as self._tabs["resources-tab"]:
self.resources_page = ResourcesTab(self)
with gr.Tab(
"Settings",
elem_id="settings-tab",
id="settings-tab",
visible=not self.f_user_management,
elem_classes=["fill-main-area-height", "scrollable"],
) as self._tabs["settings-tab"]:
self.settings_page = SettingsPage(self)
with gr.Tab(
"Settings",
elem_id="settings-tab",
id="settings-tab",
visible=not self.f_user_management,
elem_classes=["fill-main-area-height", "scrollable"],
) as self._tabs["settings-tab"]:
self.settings_page = SettingsPage(self)
with gr.Tab(
"Help",

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,21 @@
import gradio as gr
from ktem.app import BasePage
from theflow.settings import settings as flowsettings
KH_DEMO_MODE = getattr(flowsettings, "KH_DEMO_MODE", False)
if not KH_DEMO_MODE:
PLACEHOLDER_TEXT = (
"This is the beginning of a new conversation.\n"
"Start by uploading a file or a web URL. "
"Visit Files tab for more options (e.g: GraphRAG)."
)
else:
PLACEHOLDER_TEXT = (
"Welcome to Kotaemon Demo. "
"Start by browsing preloaded conversations to get onboard.\n"
"Check out Hint section for more tips."
)
class ChatPanel(BasePage):
@@ -10,10 +26,7 @@ class ChatPanel(BasePage):
def on_building_ui(self):
self.chatbot = gr.Chatbot(
label=self._app.app_name,
placeholder=(
"This is the beginning of a new conversation.\nIf you are new, "
"visit the Help tab for quick instructions."
),
placeholder=PLACEHOLDER_TEXT,
show_label=False,
elem_id="main-chat-bot",
show_copy_button=True,

View File

@@ -4,29 +4,34 @@ from theflow.settings import settings as flowsettings
class ChatSuggestion(BasePage):
CHAT_SAMPLES = getattr(
flowsettings,
"KH_FEATURE_CHAT_SUGGESTION_SAMPLES",
[
"Summary this document",
"Generate a FAQ for this document",
"Identify the main highlights in bullet points",
],
)
def __init__(self, app):
self._app = app
self.on_building_ui()
def on_building_ui(self):
chat_samples = getattr(
flowsettings,
"KH_FEATURE_CHAT_SUGGESTION_SAMPLES",
[
"Summary this document",
"Generate a FAQ for this document",
"Identify the main highlights in this text",
],
)
self.chat_samples = [[each] for each in chat_samples]
self.chat_samples = [[each] for each in self.CHAT_SAMPLES]
with gr.Accordion(
label="Chat Suggestion",
visible=getattr(flowsettings, "KH_FEATURE_CHAT_SUGGESTION", False),
) as self.accordion:
self.default_example = gr.State(
value=self.chat_samples,
)
self.examples = gr.DataFrame(
value=self.chat_samples,
headers=["Next Question"],
interactive=False,
elem_id="chat-suggestion",
wrap=True,
)

View File

@@ -14,11 +14,22 @@ from .chat_suggestion import ChatSuggestion
from .common import STATE
logger = logging.getLogger(__name__)
KH_DEMO_MODE = getattr(flowsettings, "KH_DEMO_MODE", False)
KH_SSO_ENABLED = getattr(flowsettings, "KH_SSO_ENABLED", False)
ASSETS_DIR = "assets/icons"
if not os.path.isdir(ASSETS_DIR):
ASSETS_DIR = "libs/ktem/ktem/assets/icons"
logout_js = """
function () {
removeFromStorage('google_api_key');
window.location.href = "/logout";
}
"""
def is_conv_name_valid(name):
"""Check if the conversation name is valid"""
errors = []
@@ -35,11 +46,13 @@ class ConversationControl(BasePage):
def __init__(self, app):
self._app = app
self.logout_js = logout_js
self.on_building_ui()
def on_building_ui(self):
with gr.Row():
gr.Markdown("## Conversations")
title_text = "Conversations" if not KH_DEMO_MODE else "Kotaemon Papers"
gr.Markdown("## {}".format(title_text))
self.btn_toggle_dark_mode = gr.Button(
value="",
icon=f"{ASSETS_DIR}/dark_mode.svg",
@@ -83,42 +96,88 @@ class ConversationControl(BasePage):
filterable=True,
interactive=True,
elem_classes=["unset-overflow"],
elem_id="conversation-dropdown",
)
with gr.Row() as self._new_delete:
self.cb_suggest_chat = gr.Checkbox(
value=False,
label="Suggest chat",
min_width=10,
scale=6,
elem_id="suggest-chat-checkbox",
container=False,
visible=not KH_DEMO_MODE,
)
self.cb_is_public = gr.Checkbox(
value=False,
label="Shared",
min_width=10,
scale=4,
label="Share this conversation",
elem_id="is-public-checkbox",
container=False,
visible=not KH_DEMO_MODE and not KH_SSO_ENABLED,
)
self.btn_conversation_rn = gr.Button(
value="",
icon=f"{ASSETS_DIR}/rename.svg",
min_width=2,
scale=1,
size="sm",
elem_classes=["no-background", "body-text-color"],
)
self.btn_del = gr.Button(
value="",
icon=f"{ASSETS_DIR}/delete.svg",
min_width=2,
scale=1,
size="sm",
elem_classes=["no-background", "body-text-color"],
)
self.btn_new = gr.Button(
value="",
icon=f"{ASSETS_DIR}/new.svg",
min_width=2,
scale=1,
size="sm",
elem_classes=["no-background", "body-text-color"],
elem_id="new-conv-button",
)
if not KH_DEMO_MODE:
self.btn_conversation_rn = gr.Button(
value="",
icon=f"{ASSETS_DIR}/rename.svg",
min_width=2,
scale=1,
size="sm",
elem_classes=["no-background", "body-text-color"],
)
self.btn_del = gr.Button(
value="",
icon=f"{ASSETS_DIR}/delete.svg",
min_width=2,
scale=1,
size="sm",
elem_classes=["no-background", "body-text-color"],
)
self.btn_new = gr.Button(
value="",
icon=f"{ASSETS_DIR}/new.svg",
min_width=2,
scale=1,
size="sm",
elem_classes=["no-background", "body-text-color"],
elem_id="new-conv-button",
)
else:
self.btn_new = gr.Button(
value="New chat",
min_width=120,
size="sm",
scale=1,
variant="primary",
elem_id="new-conv-button",
visible=False,
)
if KH_DEMO_MODE:
with gr.Row():
self.btn_demo_login = gr.Button(
"Sign-in to create new chat",
min_width=120,
size="sm",
scale=1,
variant="primary",
)
_js_redirect = """
() => {
url = '/login' + window.location.search;
window.open(url, '_blank');
}
"""
self.btn_demo_login.click(None, js=_js_redirect)
self.btn_demo_logout = gr.Button(
"Sign-out",
min_width=120,
size="sm",
scale=1,
visible=False,
)
with gr.Row(visible=False) as self._delete_confirm:
self.btn_del_conf = gr.Button(
@@ -139,8 +198,6 @@ class ConversationControl(BasePage):
visible=False,
)
self.chat_suggestion = ChatSuggestion(self._app)
def load_chat_history(self, user_id):
"""Reload chat history"""
@@ -241,6 +298,8 @@ class ConversationControl(BasePage):
def select_conv(self, conversation_id, user_id):
"""Select the conversation"""
default_chat_suggestions = [[each] for each in ChatSuggestion.CHAT_SAMPLES]
with Session(engine) as session:
statement = select(Conversation).where(Conversation.id == conversation_id)
try:
@@ -257,7 +316,9 @@ class ConversationControl(BasePage):
selected = {}
chats = result.data_source.get("messages", [])
chat_suggestions = result.data_source.get("chat_suggestions", [])
chat_suggestions = result.data_source.get(
"chat_suggestions", default_chat_suggestions
)
retrieval_history: list[str] = result.data_source.get(
"retrieval_messages", []
@@ -282,7 +343,7 @@ class ConversationControl(BasePage):
name = ""
selected = {}
chats = []
chat_suggestions = []
chat_suggestions = default_chat_suggestions
retrieval_history = []
plot_history = []
info_panel = ""
@@ -317,25 +378,21 @@ class ConversationControl(BasePage):
def rename_conv(self, conversation_id, new_name, is_renamed, user_id):
"""Rename the conversation"""
if not is_renamed:
if not is_renamed or KH_DEMO_MODE or user_id is None or not conversation_id:
return (
gr.update(),
conversation_id,
gr.update(visible=False),
)
if user_id is None:
gr.Warning("Please sign in first (Settings → User Settings)")
return gr.update(), ""
if not conversation_id:
gr.Warning("No conversation selected.")
return gr.update(), ""
errors = is_conv_name_valid(new_name)
if errors:
gr.Warning(errors)
return gr.update(), conversation_id
return (
gr.update(),
conversation_id,
gr.update(visible=False),
)
with Session(engine) as session:
statement = select(Conversation).where(Conversation.id == conversation_id)
@@ -382,6 +439,29 @@ class ConversationControl(BasePage):
gr.Info("Chat suggestions updated.")
def toggle_demo_login_visibility(self, user_api_key, request: gr.Request):
try:
import gradiologin as grlogin
user = grlogin.get_user(request)
except (ImportError, AssertionError):
user = None
if user: # or user_api_key:
return [
gr.update(visible=True),
gr.update(visible=True),
gr.update(visible=True),
gr.update(visible=False),
]
else:
return [
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=True),
]
def _on_app_created(self):
"""Reload the conversation once the app is created"""
self._app.app.load(

View File

@@ -0,0 +1,23 @@
from textwrap import dedent
import gradio as gr
from ktem.app import BasePage
class HintPage(BasePage):
def __init__(self, app):
self._app = app
self.on_building_ui()
def on_building_ui(self):
with gr.Accordion(label="Hint", open=False):
gr.Markdown(
dedent(
"""
- You can select any text from the chat answer to **highlight relevant citation(s)** on the right panel.
- **Citations** can be viewed on both PDF viewer and raw text.
- You can tweak the citation format and use advance (CoT) reasoning in **Chat settings** menu.
- Want to **explore more**? Check out the **Help** section to create your private space.
""" # noqa
)
)

View File

@@ -0,0 +1,41 @@
import gradio as gr
from ktem.app import BasePage
from pandas import DataFrame
from ...utils.hf_papers import fetch_papers
class PaperListPage(BasePage):
def __init__(self, app):
self._app = app
self.on_building_ui()
def on_building_ui(self):
self.papers_state = gr.State(None)
with gr.Accordion(
label="Browse popular daily papers",
open=True,
) as self.accordion:
self.examples = gr.DataFrame(
value=[],
headers=["title", "url", "upvotes"],
column_widths=[60, 30, 10],
interactive=False,
elem_id="paper-suggestion",
wrap=True,
)
return self.examples
def load(self):
papers = fetch_papers(top_n=5)
papers_df = DataFrame(papers)
return (papers_df, papers)
def _on_app_created(self):
self._app.app.load(
self.load,
outputs=[self.examples, self.papers_state],
)
def select_example(self, state, ev: gr.SelectData):
return state[ev.index[0]]["url"]

View File

@@ -12,7 +12,7 @@ class ReportIssue(BasePage):
self.on_building_ui()
def on_building_ui(self):
with gr.Accordion(label="Feedback", open=False):
with gr.Accordion(label="Feedback", open=False, elem_id="report-accordion"):
self.correctness = gr.Radio(
choices=[
("The answer is correct", "correct"),

View File

@@ -3,8 +3,12 @@ from pathlib import Path
import gradio as gr
import requests
from decouple import config
from theflow.settings import settings
KH_DEMO_MODE = getattr(settings, "KH_DEMO_MODE", False)
HF_SPACE_URL = config("HF_SPACE_URL", default="")
def get_remote_doc(url: str) -> str:
try:
@@ -59,6 +63,22 @@ class HelpPage:
about_md = f"Version: {self.app_version}\n\n{about_md}"
gr.Markdown(about_md)
if KH_DEMO_MODE:
with gr.Accordion("Create Your Own Space"):
gr.Markdown(
"This is a demo with limited functionality. "
"Use **Create space** button to install Kotaemon "
"in your own space with all features "
"(including upload and manage your private "
"documents securely)."
)
gr.Button(
value="Create Your Own Space",
link=HF_SPACE_URL,
variant="primary",
size="lg",
)
user_guide_md_dir = self.doc_dir / "usage.md"
if user_guide_md_dir.exists():
with (self.doc_dir / "usage.md").open(encoding="utf-8") as fi:
@@ -68,7 +88,7 @@ class HelpPage:
f"{self.remote_content_url}/v{self.app_version}/docs/usage.md"
)
if user_guide_md:
with gr.Accordion("User Guide"):
with gr.Accordion("User Guide", open=not KH_DEMO_MODE):
gr.Markdown(user_guide_md)
if self.app_version:

View File

@@ -3,6 +3,7 @@ import hashlib
import gradio as gr
from ktem.app import BasePage
from ktem.db.models import User, engine
from ktem.pages.resources.user import create_user
from sqlmodel import Session, select
fetch_creds = """
@@ -85,19 +86,47 @@ class LoginPage(BasePage):
},
)
def login(self, usn, pwd):
if not usn or not pwd:
return None, usn, pwd
def login(self, usn, pwd, request: gr.Request):
try:
import gradiologin as grlogin
user = grlogin.get_user(request)
except (ImportError, AssertionError):
user = None
if user:
user_id = user["sub"]
with Session(engine) as session:
stmt = select(User).where(
User.id == user_id,
)
result = session.exec(stmt).all()
hashed_password = hashlib.sha256(pwd.encode()).hexdigest()
with Session(engine) as session:
stmt = select(User).where(
User.username_lower == usn.lower().strip(),
User.password == hashed_password,
)
result = session.exec(stmt).all()
if result:
return result[0].id, "", ""
print("Existing user:", user)
return user_id, "", ""
else:
print("Creating new user:", user)
create_user(
usn=user["email"],
pwd="",
user_id=user_id,
is_admin=False,
)
return user_id, "", ""
else:
if not usn or not pwd:
return None, usn, pwd
gr.Warning("Invalid username or password")
return None, usn, pwd
hashed_password = hashlib.sha256(pwd.encode()).hexdigest()
with Session(engine) as session:
stmt = select(User).where(
User.username_lower == usn.lower().strip(),
User.password == hashed_password,
)
result = session.exec(stmt).all()
if result:
return result[0].id, "", ""
gr.Warning("Invalid username or password")
return None, usn, pwd

View File

@@ -94,7 +94,7 @@ def validate_password(pwd, pwd_cnf):
return ""
def create_user(usn, pwd) -> bool:
def create_user(usn, pwd, user_id=None, is_admin=True) -> bool:
with Session(engine) as session:
statement = select(User).where(User.username_lower == usn.lower())
result = session.exec(statement).all()
@@ -105,10 +105,11 @@ def create_user(usn, pwd) -> bool:
else:
hashed_password = hashlib.sha256(pwd.encode()).hexdigest()
user = User(
id=user_id,
username=usn,
username_lower=usn.lower(),
password=hashed_password,
admin=True,
admin=is_admin,
)
session.add(user)
session.commit()
@@ -136,11 +137,12 @@ class UserManagement(BasePage):
self.state_user_list = gr.State(value=None)
self.user_list = gr.DataFrame(
headers=["id", "name", "admin"],
column_widths=[0, 50, 50],
interactive=False,
)
with gr.Group(visible=False) as self._selected_panel:
self.selected_user_id = gr.Number(value=-1, visible=False)
self.selected_user_id = gr.State(value=-1)
self.usn_edit = gr.Textbox(label="Username")
with gr.Row():
self.pwd_edit = gr.Textbox(label="Change password", type="password")
@@ -346,7 +348,7 @@ class UserManagement(BasePage):
if not ev.selected:
return -1
return int(user_list["id"][ev.index[0]])
return user_list["id"][ev.index[0]]
def on_selected_user_change(self, selected_user_id):
if selected_user_id == -1:
@@ -367,7 +369,7 @@ class UserManagement(BasePage):
btn_delete_no = gr.update(visible=False)
with Session(engine) as session:
statement = select(User).where(User.id == int(selected_user_id))
statement = select(User).where(User.id == selected_user_id)
user = session.exec(statement).one()
usn_edit = gr.update(value=user.username)
@@ -414,7 +416,7 @@ class UserManagement(BasePage):
return pwd, pwd_cnf
with Session(engine) as session:
statement = select(User).where(User.id == int(selected_user_id))
statement = select(User).where(User.id == selected_user_id)
user = session.exec(statement).one()
user.username = usn
user.username_lower = usn.lower()
@@ -432,7 +434,7 @@ class UserManagement(BasePage):
return selected_user_id
with Session(engine) as session:
statement = select(User).where(User.id == int(selected_user_id))
statement = select(User).where(User.id == selected_user_id)
user = session.exec(statement).one()
session.delete(user)
session.commit()

View File

@@ -5,6 +5,10 @@ from ktem.app import BasePage
from ktem.components import reasonings
from ktem.db.models import Settings, User, engine
from sqlmodel import Session, select
from theflow.settings import settings as flowsettings
KH_SSO_ENABLED = getattr(flowsettings, "KH_SSO_ENABLED", False)
signout_js = """
function(u, c, pw, pwc) {
@@ -80,38 +84,44 @@ class SettingsPage(BasePage):
# render application page if there are application settings
self._render_app_tab = False
if self._default_settings.application.settings:
if not KH_SSO_ENABLED and self._default_settings.application.settings:
self._render_app_tab = True
# render index page if there are index settings (general and/or specific)
self._render_index_tab = False
if self._default_settings.index.settings:
self._render_index_tab = True
else:
for sig in self._default_settings.index.options.values():
if sig.settings:
self._render_index_tab = True
break
if not KH_SSO_ENABLED:
if self._default_settings.index.settings:
self._render_index_tab = True
else:
for sig in self._default_settings.index.options.values():
if sig.settings:
self._render_index_tab = True
break
# render reasoning page if there are reasoning settings
self._render_reasoning_tab = False
if len(self._default_settings.reasoning.settings) > 1:
self._render_reasoning_tab = True
else:
for sig in self._default_settings.reasoning.options.values():
if sig.settings:
self._render_reasoning_tab = True
break
if not KH_SSO_ENABLED:
if len(self._default_settings.reasoning.settings) > 1:
self._render_reasoning_tab = True
else:
for sig in self._default_settings.reasoning.options.values():
if sig.settings:
self._render_reasoning_tab = True
break
self.on_building_ui()
def on_building_ui(self):
self.setting_save_btn = gr.Button(
"Save & Close",
variant="primary",
elem_classes=["right-button"],
elem_id="save-setting-btn",
)
if not KH_SSO_ENABLED:
self.setting_save_btn = gr.Button(
"Save & Close",
variant="primary",
elem_classes=["right-button"],
elem_id="save-setting-btn",
)
if self._app.f_user_management:
with gr.Tab("User settings"):
self.user_tab()
@@ -175,21 +185,22 @@ class SettingsPage(BasePage):
)
def on_register_events(self):
self.setting_save_btn.click(
self.save_setting,
inputs=[self._user_id] + self.components(),
outputs=self._settings_state,
).then(
lambda: gr.Tabs(selected="chat-tab"),
outputs=self._app.tabs,
)
if not KH_SSO_ENABLED:
self.setting_save_btn.click(
self.save_setting,
inputs=[self._user_id] + self.components(),
outputs=self._settings_state,
).then(
lambda: gr.Tabs(selected="chat-tab"),
outputs=self._app.tabs,
)
self._components["reasoning.use"].change(
self.change_reasoning_mode,
inputs=[self._components["reasoning.use"]],
outputs=list(self._reasoning_mode.values()),
show_progress="hidden",
)
if self._app.f_user_management:
if self._app.f_user_management and not KH_SSO_ENABLED:
self.password_change_btn.click(
self.change_password,
inputs=[
@@ -223,15 +234,21 @@ class SettingsPage(BasePage):
def user_tab(self):
# user management
self.current_name = gr.Markdown("Current user: ___")
self.signout = gr.Button("Logout")
self.password_change = gr.Textbox(
label="New password", interactive=True, type="password"
)
self.password_change_confirm = gr.Textbox(
label="Confirm password", interactive=True, type="password"
)
self.password_change_btn = gr.Button("Change password", interactive=True)
if KH_SSO_ENABLED:
import gradiologin as grlogin
self.sso_signout = grlogin.LogoutButton("Logout")
else:
self.signout = gr.Button("Logout")
self.password_change = gr.Textbox(
label="New password", interactive=True, type="password"
)
self.password_change_confirm = gr.Textbox(
label="Confirm password", interactive=True, type="password"
)
self.password_change_btn = gr.Button("Change password", interactive=True)
def change_password(self, user_id, password, password_confirm):
from ktem.pages.resources.user import validate_password

View File

@@ -2,13 +2,13 @@ import json
import gradio as gr
import requests
from decouple import config
from ktem.app import BasePage
from ktem.embeddings.manager import embedding_models_manager as embeddings
from ktem.llms.manager import llms
from ktem.rerankings.manager import reranking_models_manager as rerankers
from theflow.settings import settings as flowsettings
KH_DEMO_MODE = getattr(flowsettings, "KH_DEMO_MODE", False)
KH_OLLAMA_URL = getattr(flowsettings, "KH_OLLAMA_URL", "http://localhost:11434/v1/")
DEFAULT_OLLAMA_URL = KH_OLLAMA_URL.replace("v1", "api")
if DEFAULT_OLLAMA_URL.endswith("/"):
@@ -113,9 +113,18 @@ class SetupPage(BasePage):
(
"#### Setup Ollama\n\n"
"Download and install Ollama from "
"https://ollama.com/"
"https://ollama.com/. Check out latest models at "
"https://ollama.com/library. "
)
)
self.ollama_model_name = gr.Textbox(
label="LLM model name",
value=config("LOCAL_MODEL", default="qwen2.5:7b"),
)
self.ollama_emb_model_name = gr.Textbox(
label="Embedding model name",
value=config("LOCAL_MODEL_EMBEDDINGS", default="nomic-embed-text"),
)
self.setup_log = gr.HTML(
show_label=False,
@@ -139,22 +148,23 @@ class SetupPage(BasePage):
self.cohere_api_key,
self.openai_api_key,
self.google_api_key,
self.ollama_model_name,
self.ollama_emb_model_name,
self.radio_model,
],
outputs=[self.setup_log],
show_progress="hidden",
)
if not KH_DEMO_MODE:
onSkipSetup = gr.on(
triggers=[self.btn_skip.click],
fn=lambda: None,
inputs=[],
show_progress="hidden",
outputs=[self.radio_model],
)
onSkipSetup = gr.on(
triggers=[self.btn_skip.click],
fn=lambda: None,
inputs=[],
show_progress="hidden",
outputs=[self.radio_model],
)
for event in self._app.get_event("onFirstSetupComplete"):
onSkipSetup = onSkipSetup.success(**event)
for event in self._app.get_event("onFirstSetupComplete"):
onSkipSetup = onSkipSetup.success(**event)
onFirstSetupComplete = onFirstSetupComplete.success(
fn=self.update_default_settings,
@@ -181,12 +191,10 @@ class SetupPage(BasePage):
cohere_api_key,
openai_api_key,
google_api_key,
ollama_model_name,
ollama_emb_model_name,
radio_model_value,
):
# skip if KH_DEMO_MODE
if KH_DEMO_MODE:
raise gr.Error(DEMO_MESSAGE)
log_content = ""
if not radio_model_value:
gr.Info("Skip setup models.")
@@ -274,7 +282,7 @@ class SetupPage(BasePage):
spec={
"__type__": "kotaemon.llms.ChatOpenAI",
"base_url": KH_OLLAMA_URL,
"model": "llama3.1:8b",
"model": ollama_model_name,
"api_key": "ollama",
},
default=True,
@@ -284,7 +292,7 @@ class SetupPage(BasePage):
spec={
"__type__": "kotaemon.embeddings.OpenAIEmbeddings",
"base_url": KH_OLLAMA_URL,
"model": "nomic-embed-text",
"model": ollama_emb_model_name,
"api_key": "ollama",
},
default=True,

View File

@@ -1,4 +1,5 @@
import logging
from textwrap import dedent
from ktem.llms.manager import llms
@@ -8,6 +9,31 @@ from kotaemon.llms import ChatLLM, PromptTemplate
logger = logging.getLogger(__name__)
MINDMAP_HTML_EXPORT_TEMPLATE = dedent(
"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Mindmap</title>
<style>
svg.markmap {
width: 100%;
height: 100vh;
}
</style>
<script src="https://cdn.jsdelivr.net/npm/markmap-autoloader@0.16"></script>
</head>
<body>
{markmap_div}
</body>
</html>
"""
)
class CreateMindmapPipeline(BaseComponent):
"""Create a mindmap from the question and context"""
@@ -37,6 +63,20 @@ Use the template like this:
""" # noqa: E501
prompt_template: str = MINDMAP_PROMPT_TEMPLATE
@classmethod
def convert_uml_to_markdown(cls, text: str) -> str:
start_phrase = "@startmindmap"
end_phrase = "@endmindmap"
try:
text = text.split(start_phrase)[-1]
text = text.split(end_phrase)[0]
text = text.strip().replace("*", "#")
except IndexError:
text = ""
return text
def run(self, question: str, context: str) -> Document: # type: ignore
prompt_template = PromptTemplate(self.prompt_template)
prompt = prompt_template.populate(
@@ -49,4 +89,9 @@ Use the template like this:
HumanMessage(content=prompt),
]
return self.llm(messages)
uml_text = self.llm(messages).text
markdown_text = self.convert_uml_to_markdown(uml_text)
return Document(
text=markdown_text,
)

View File

@@ -15,12 +15,10 @@ class SuggestFollowupQuesPipeline(BaseComponent):
SUGGEST_QUESTIONS_PROMPT_TEMPLATE = (
"Based on the chat history above. "
"your task is to generate 3 to 5 relevant follow-up questions. "
"These questions should be simple, clear, "
"These questions should be simple, very concise, "
"and designed to guide the conversation further. "
"Ensure that the questions are open-ended to encourage detailed responses. "
"Respond in JSON format with 'questions' key. "
"Answer using the language {lang} same as the question. "
"If the question uses Chinese, the answer should be in Chinese.\n"
)
prompt_template: str = SUGGEST_QUESTIONS_PROMPT_TEMPLATE
extra_prompt: str = """Example of valid response:

View File

@@ -1,5 +1,6 @@
import logging
import threading
from textwrap import dedent
from typing import Generator
from ktem.embeddings.manager import embedding_models_manager as embeddings
@@ -8,7 +9,6 @@ from ktem.reasoning.prompt_optimization import (
DecomposeQuestionPipeline,
RewriteQuestionPipeline,
)
from ktem.utils.plantuml import PlantUML
from ktem.utils.render import Render
from ktem.utils.visualize_cited import CreateCitationVizPipeline
from plotly.io import to_json
@@ -165,21 +165,23 @@ class FullQAPipeline(BaseReasoning):
mindmap = answer.metadata["mindmap"]
if mindmap:
mindmap_text = mindmap.text
uml_renderer = PlantUML()
try:
mindmap_svg = uml_renderer.process(mindmap_text)
except Exception as e:
print("Failed to process mindmap:", e)
mindmap_svg = "<svg></svg>"
# post-process the mindmap SVG
mindmap_svg = (
mindmap_svg.replace("sans-serif", "Quicksand, sans-serif")
.replace("#181818", "#cecece")
.replace("background:#FFFFF", "background:none")
.replace("stroke-width:1", "stroke-width:2")
)
mindmap_svg = dedent(
"""
<div class="markmap">
<script type="text/template">
---
markmap:
colorFreezeLevel: 2
activeNode:
placement: center
initialExpandLevel: 4
maxWidth: 200
---
{}
</script>
</div>
"""
).format(mindmap_text)
mindmap_content = Document(
channel="info",
@@ -323,7 +325,7 @@ class FullQAPipeline(BaseReasoning):
def prepare_pipeline_instance(cls, settings, retrievers):
return cls(
retrievers=retrievers,
rewrite_pipeline=RewriteQuestionPipeline(),
rewrite_pipeline=None,
)
@classmethod
@@ -411,8 +413,8 @@ class FullQAPipeline(BaseReasoning):
"value": "highlight",
"component": "radio",
"choices": [
("highlight (verbose)", "highlight"),
("inline (concise)", "inline"),
("citation: highlight", "highlight"),
("citation: inline", "inline"),
("no citation", "off"),
],
},
@@ -433,7 +435,7 @@ class FullQAPipeline(BaseReasoning):
},
"system_prompt": {
"name": "System Prompt",
"value": "This is a question answering system",
"value": ("This is a question answering system."),
},
"qa_prompt": {
"name": "QA Prompt (contains {context}, {question}, {lang})",

View File

@@ -0,0 +1,114 @@
from datetime import datetime, timedelta
import requests
from cachetools import TTLCache, cached
HF_API_URL = "https://huggingface.co/api/daily_papers"
ARXIV_URL = "https://arxiv.org/abs/{paper_id}"
SEMANTIC_SCHOLAR_QUERY_URL = "https://api.semanticscholar.org/graph/v1/paper/search/match?query={paper_name}" # noqa
SEMANTIC_SCHOLAR_RECOMMEND_URL = (
"https://api.semanticscholar.org/recommendations/v1/papers/" # noqa
)
CACHE_TIME = 60 * 60 * 6 # 6 hours
# Function to parse the date string
def parse_date(date_str):
return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S.%fZ")
@cached(cache=TTLCache(maxsize=500, ttl=CACHE_TIME))
def get_recommendations_from_semantic_scholar(semantic_scholar_id: str):
try:
r = requests.post(
SEMANTIC_SCHOLAR_RECOMMEND_URL,
json={
"positivePaperIds": [semantic_scholar_id],
},
params={"fields": "externalIds,title,year", "limit": 14}, # type: ignore
)
return r.json()["recommendedPapers"]
except KeyError as e:
print(e)
return []
def filter_recommendations(recommendations, max_paper_count=5):
# include only arxiv papers
arxiv_paper = [
r for r in recommendations if r["externalIds"].get("ArXiv", None) is not None
]
if len(arxiv_paper) > max_paper_count:
arxiv_paper = arxiv_paper[:max_paper_count]
return arxiv_paper
def format_recommendation_into_markdown(recommendations):
comment = "(recommended by the Semantic Scholar API)\n\n"
for r in recommendations:
hub_paper_url = f"https://arxiv.org/abs/{r['externalIds']['ArXiv']}"
comment += f"* [{r['title']}]({hub_paper_url}) ({r['year']})\n"
return comment
def get_paper_id_from_name(paper_name):
try:
response = requests.get(
SEMANTIC_SCHOLAR_QUERY_URL.format(paper_name=paper_name)
)
response.raise_for_status()
items = response.json()
paper_id = items.get("data", [])[0].get("paperId")
except Exception as e:
print(e)
return None
return paper_id
def get_recommended_papers(paper_name):
paper_id = get_paper_id_from_name(paper_name)
recommended_content = ""
if paper_id is None:
return recommended_content
recommended_papers = get_recommendations_from_semantic_scholar(paper_id)
filtered_recommendations = filter_recommendations(recommended_papers)
recommended_content = format_recommendation_into_markdown(filtered_recommendations)
return recommended_content
def fetch_papers(top_n=5):
try:
response = requests.get(f"{HF_API_URL}?limit=100")
response.raise_for_status()
items = response.json()
# Calculate the date 3 days ago from now
three_days_ago = datetime.now() - timedelta(days=3)
# Filter items from the last 3 days
recent_items = [
item
for item in items
if parse_date(item.get("publishedAt")) >= three_days_ago
]
recent_items.sort(
key=lambda x: x.get("paper", {}).get("upvotes", 0), reverse=True
)
output_items = [
{
"title": item.get("paper", {}).get("title"),
"url": ARXIV_URL.format(paper_id=item.get("paper", {}).get("id")),
"upvotes": item.get("paper", {}).get("upvotes"),
}
for item in recent_items[:top_n]
]
except Exception as e:
print(e)
return []
return output_items

View File

@@ -0,0 +1,48 @@
from collections import defaultdict
from datetime import datetime, timedelta
import gradio as gr
from decouple import config
# In-memory store for rate limiting (for demonstration purposes)
rate_limit_store: dict[str, dict] = defaultdict(dict)
# Rate limit configuration
RATE_LIMIT = config("RATE_LIMIT", default=20, cast=int)
RATE_LIMIT_PERIOD = timedelta(hours=24)
def check_rate_limit(limit_type: str, request: gr.Request):
if request is None:
raise ValueError("This feature is not available")
user_id = None
try:
import gradiologin as grlogin
user = grlogin.get_user(request)
if user:
user_id = user.get("email")
except (ImportError, AssertionError):
pass
if not user_id:
raise ValueError("Please sign-in to use this feature")
now = datetime.now()
user_data = rate_limit_store[limit_type].get(
user_id, {"count": 0, "reset_time": now + RATE_LIMIT_PERIOD}
)
if now >= user_data["reset_time"]:
# Reset the rate limit for the user
user_data = {"count": 0, "reset_time": now + RATE_LIMIT_PERIOD}
if user_data["count"] >= RATE_LIMIT:
raise ValueError("Rate limit exceeded. Please try again later.")
# Increment the request count
user_data["count"] += 1
rate_limit_store[limit_type][user_id] = user_data
return user_id

View File

@@ -5,7 +5,7 @@ from fast_langdetect import detect
from kotaemon.base import RetrievedDocument
BASE_PATH = os.environ.get("GRADIO_ROOT_PATH", "")
BASE_PATH = os.environ.get("GR_FILE_ROOT_PATH", "")
def is_close(val1, val2, tolerance=1e-9):
@@ -44,7 +44,8 @@ class Render:
o = " open" if open else ""
return (
f"<details class='evidence' {o}><summary>"
f"{header}</summary>{content}</details><br>"
f"{header}</summary>{content}"
"</details><br>"
)
@staticmethod
@@ -225,6 +226,9 @@ class Render:
doc,
highlight_text=highlight_text,
)
rendered_doc_content = (
f"<div class='evidence-content'>{rendered_doc_content}</div>"
)
return Render.collapsible(
header=rendered_header,

View File

@@ -27,6 +27,7 @@ dependencies = [
"sqlmodel>=0.0.16,<0.1",
"tiktoken>=0.6.0,<1",
"gradio>=4.31.0,<5",
"gradiologin",
"python-multipart==0.0.12", # required for gradio, pinning to avoid yanking issues with micropip (fixed in gradio >= 5.4.0)
"markdown>=3.6,<4",
"tzlocal>=5.0",