fix: add optional graphrag toggle in dockerfile (#377)

* fix: toggle graphrag install in Docker build

* fix: update Dockerfile

* fix: remove unused logics in chat_fn

* fix: disable duckduckgo test due to API limit
This commit is contained in:
Tuan Anh Nguyen Dang (Tadashi_Cin) 2024-10-10 16:09:57 +07:00 committed by GitHub
parent 3ff6af8acf
commit 6da9db489f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 39 additions and 132 deletions

View File

@ -88,16 +88,34 @@ jobs:
username: ${{ github.actor }} username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }} password: ${{ secrets.GITHUB_TOKEN }}
- name: Build docker image - name: Build docker image (amd64)
uses: docker/build-push-action@v6 uses: docker/build-push-action@v6
with: with:
file: Dockerfile file: Dockerfile
context: . context: .
push: true push: true
platforms: linux/amd64,linux/arm64 platforms: linux/amd64
tags: | tags: |
${{ steps.meta.outputs.tags }} ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }} labels: ${{ steps.meta.outputs.labels }}
target: ${{ matrix.target }} target: ${{ matrix.target }}
cache-from: type=gha cache-from: type=gha
cache-to: type=gha,mode=max cache-to: type=gha,mode=max
build-args: |
ENABLE_GRAPHRAG=true
- name: Build docker image (arm64)
uses: docker/build-push-action@v6
with:
file: Dockerfile
context: .
push: true
platforms: linux/arm64
tags: |
${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
target: ${{ matrix.target }}
cache-from: type=gha
cache-to: type=gha,mode=max
build-args: |
ENABLE_GRAPHRAG=false

View File

@ -14,10 +14,14 @@ RUN apt-get update -qqy && \
curl \ curl \
cargo cargo
# Setup args
ARG ENABLE_GRAPHRAG=true
# Set environment variables # Set environment variables
ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1 ENV PYTHONUNBUFFERED=1
ENV PYTHONIOENCODING=UTF-8 ENV PYTHONIOENCODING=UTF-8
ENV ENABLE_GRAPHRAG=${ENABLE_GRAPHRAG}
# Create working directory # Create working directory
WORKDIR /app WORKDIR /app
@ -30,15 +34,19 @@ RUN bash scripts/download_pdfjs.sh $PDFJS_PREBUILT_DIR
# Copy contents # Copy contents
COPY . /app COPY . /app
COPY .env.example /app/.env
# Install pip packages # Install pip packages
RUN --mount=type=ssh \ RUN --mount=type=ssh \
--mount=type=cache,target=/root/.cache/pip \ --mount=type=cache,target=/root/.cache/pip \
pip install -e "libs/kotaemon" \ pip install -e "libs/kotaemon" \
&& pip install -e "libs/ktem" \ && pip install -e "libs/ktem" \
&& pip install graphrag future \
&& pip install "pdfservices-sdk@git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements" && pip install "pdfservices-sdk@git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements"
RUN --mount=type=ssh \
--mount=type=cache,target=/root/.cache/pip \
if [ "$ENABLE_GRAPHRAG" = "true" ]; then pip install graphrag future; fi
# Clean up # Clean up
RUN apt-get autoremove \ RUN apt-get autoremove \
&& apt-get clean \ && apt-get clean \
@ -66,10 +74,6 @@ RUN --mount=type=ssh \
--mount=type=cache,target=/root/.cache/pip \ --mount=type=cache,target=/root/.cache/pip \
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
# Copy contents
COPY . /app
COPY .env.example /app/.env
# Install additional pip packages # Install additional pip packages
RUN --mount=type=ssh \ RUN --mount=type=ssh \
--mount=type=cache,target=/root/.cache/pip \ --mount=type=cache,target=/root/.cache/pip \

View File

@ -98,15 +98,15 @@ _openai_chat_completion_responses_react_langchain_tool = [
"Action: wikipedia\n" "Action: wikipedia\n"
"Action Input: Cinnamon AI company\n" "Action Input: Cinnamon AI company\n"
), ),
( # (
"The information retrieved from Wikipedia is not " # "The information retrieved from Wikipedia is not "
"about Cinnamon AI company, but about Blue Prism, " # "about Cinnamon AI company, but about Blue Prism, "
"a British multinational software corporation. " # "a British multinational software corporation. "
"I need to try another source to gather information " # "I need to try another source to gather information "
"about Cinnamon AI company.\n" # "about Cinnamon AI company.\n"
"Action: duckduckgo_search\n" # "Action: duckduckgo_search\n"
"Action Input: Cinnamon AI company\n" # "Action Input: Cinnamon AI company\n"
), # ),
FINAL_RESPONSE_TEXT, FINAL_RESPONSE_TEXT,
] ]
] ]

View File

@ -1,14 +1,10 @@
import asyncio import asyncio
import csv
import json import json
import re import re
from copy import deepcopy from copy import deepcopy
from datetime import datetime
from pathlib import Path
from typing import Optional from typing import Optional
import gradio as gr import gradio as gr
from filelock import FileLock
from ktem.app import BasePage from ktem.app import BasePage
from ktem.components import reasonings from ktem.components import reasonings
from ktem.db.models import Conversation, engine from ktem.db.models import Conversation, engine
@ -269,10 +265,6 @@ class ChatPage(BasePage):
self._suggestion_updated, self._suggestion_updated,
self._app.user_id, self._app.user_id,
], ],
outputs=[
self.chat_control.conversation,
self.chat_control.conversation,
],
show_progress="hidden", show_progress="hidden",
) )
@ -372,10 +364,6 @@ class ChatPage(BasePage):
self._suggestion_updated, self._suggestion_updated,
self._app.user_id, self._app.user_id,
], ],
outputs=[
self.chat_control.conversation,
self.chat_control.conversation,
],
show_progress="hidden", show_progress="hidden",
) )
@ -995,96 +983,3 @@ class ChatPage(BasePage):
pass pass
return suggested_ques, updated return suggested_ques, updated
def backup_original_info(
self, chat_history, settings, info_pannel, original_chat_history
):
original_chat_history.append(chat_history[-1])
return original_chat_history, settings, info_pannel
def save_log(
self,
conversation_id,
chat_history,
settings,
info_panel,
original_chat_history,
original_settings,
original_info_panel,
log_dir,
):
if not Path(log_dir).exists():
Path(log_dir).mkdir(parents=True)
lock = FileLock(Path(log_dir) / ".lock")
# get current date
today = datetime.now()
formatted_date = today.strftime("%d%m%Y_%H")
with Session(engine) as session:
statement = select(Conversation).where(Conversation.id == conversation_id)
result = session.exec(statement).one()
data_source = deepcopy(result.data_source)
likes = data_source.get("likes", [])
if not likes:
return
feedback = likes[-1][-1]
message_index = likes[-1][0]
current_message = chat_history[message_index[0]]
original_message = original_chat_history[message_index[0]]
is_original = all(
[
current_item == original_item
for current_item, original_item in zip(
current_message, original_message
)
]
)
dataframe = [
[
conversation_id,
message_index,
current_message[0],
current_message[1],
chat_history,
settings,
info_panel,
feedback,
is_original,
original_message[1],
original_chat_history,
original_settings,
original_info_panel,
]
]
with lock:
log_file = Path(log_dir) / f"{formatted_date}_log.csv"
is_log_file_exist = log_file.is_file()
with open(log_file, "a") as f:
writer = csv.writer(f)
# write headers
if not is_log_file_exist:
writer.writerow(
[
"Conversation ID",
"Message ID",
"Question",
"Answer",
"Chat History",
"Settings",
"Evidences",
"Feedback",
"Original/ Rewritten",
"Original Answer",
"Original Chat History",
"Original Settings",
"Original Evidences",
]
)
writer.writerows(dataframe)

View File

@ -326,11 +326,7 @@ class ConversationControl(BasePage):
): ):
"""Update the conversation's chat suggestions""" """Update the conversation's chat suggestions"""
if not is_updated: if not is_updated:
return ( return
gr.update(),
conversation_id,
gr.update(visible=False),
)
if user_id is None: if user_id is None:
gr.Warning("Please sign in first (Settings → User Settings)") gr.Warning("Please sign in first (Settings → User Settings)")
@ -353,13 +349,7 @@ class ConversationControl(BasePage):
session.add(result) session.add(result)
session.commit() session.commit()
history = self.load_chat_history(user_id)
gr.Info("Chat suggestions updated.") gr.Info("Chat suggestions updated.")
return (
gr.update(choices=history),
conversation_id,
gr.update(visible=False),
)
def _on_app_created(self): def _on_app_created(self):
"""Reload the conversation once the app is created""" """Reload the conversation once the app is created"""