diff --git a/.github/workflows/build-push-docker.yaml b/.github/workflows/build-push-docker.yaml index 26a214e..4b195e1 100644 --- a/.github/workflows/build-push-docker.yaml +++ b/.github/workflows/build-push-docker.yaml @@ -88,16 +88,34 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Build docker image + - name: Build docker image (amd64) uses: docker/build-push-action@v6 with: file: Dockerfile context: . push: true - platforms: linux/amd64,linux/arm64 + platforms: linux/amd64 tags: | ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} target: ${{ matrix.target }} cache-from: type=gha cache-to: type=gha,mode=max + build-args: | + ENABLE_GRAPHRAG=true + + - name: Build docker image (arm64) + uses: docker/build-push-action@v6 + with: + file: Dockerfile + context: . + push: true + platforms: linux/arm64 + tags: | + ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + target: ${{ matrix.target }} + cache-from: type=gha + cache-to: type=gha,mode=max + build-args: | + ENABLE_GRAPHRAG=false diff --git a/Dockerfile b/Dockerfile index 31f64e7..fb6d499 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,10 +14,14 @@ RUN apt-get update -qqy && \ curl \ cargo +# Setup args +ARG ENABLE_GRAPHRAG=true + # Set environment variables ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONUNBUFFERED=1 ENV PYTHONIOENCODING=UTF-8 +ENV ENABLE_GRAPHRAG=${ENABLE_GRAPHRAG} # Create working directory WORKDIR /app @@ -30,15 +34,19 @@ RUN bash scripts/download_pdfjs.sh $PDFJS_PREBUILT_DIR # Copy contents COPY . /app +COPY .env.example /app/.env # Install pip packages RUN --mount=type=ssh \ --mount=type=cache,target=/root/.cache/pip \ pip install -e "libs/kotaemon" \ && pip install -e "libs/ktem" \ - && pip install graphrag future \ && pip install "pdfservices-sdk@git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements" +RUN --mount=type=ssh \ + --mount=type=cache,target=/root/.cache/pip \ + if [ "$ENABLE_GRAPHRAG" = "true" ]; then pip install graphrag future; fi + # Clean up RUN apt-get autoremove \ && apt-get clean \ @@ -66,10 +74,6 @@ RUN --mount=type=ssh \ --mount=type=cache,target=/root/.cache/pip \ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu -# Copy contents -COPY . /app -COPY .env.example /app/.env - # Install additional pip packages RUN --mount=type=ssh \ --mount=type=cache,target=/root/.cache/pip \ diff --git a/libs/kotaemon/tests/test_agent.py b/libs/kotaemon/tests/test_agent.py index 5fcf84b..1a4d91c 100644 --- a/libs/kotaemon/tests/test_agent.py +++ b/libs/kotaemon/tests/test_agent.py @@ -98,15 +98,15 @@ _openai_chat_completion_responses_react_langchain_tool = [ "Action: wikipedia\n" "Action Input: Cinnamon AI company\n" ), - ( - "The information retrieved from Wikipedia is not " - "about Cinnamon AI company, but about Blue Prism, " - "a British multinational software corporation. " - "I need to try another source to gather information " - "about Cinnamon AI company.\n" - "Action: duckduckgo_search\n" - "Action Input: Cinnamon AI company\n" - ), + # ( + # "The information retrieved from Wikipedia is not " + # "about Cinnamon AI company, but about Blue Prism, " + # "a British multinational software corporation. " + # "I need to try another source to gather information " + # "about Cinnamon AI company.\n" + # "Action: duckduckgo_search\n" + # "Action Input: Cinnamon AI company\n" + # ), FINAL_RESPONSE_TEXT, ] ] diff --git a/libs/ktem/ktem/pages/chat/__init__.py b/libs/ktem/ktem/pages/chat/__init__.py index a54ff9f..8b7e04c 100644 --- a/libs/ktem/ktem/pages/chat/__init__.py +++ b/libs/ktem/ktem/pages/chat/__init__.py @@ -1,14 +1,10 @@ import asyncio -import csv import json import re from copy import deepcopy -from datetime import datetime -from pathlib import Path from typing import Optional import gradio as gr -from filelock import FileLock from ktem.app import BasePage from ktem.components import reasonings from ktem.db.models import Conversation, engine @@ -269,10 +265,6 @@ class ChatPage(BasePage): self._suggestion_updated, self._app.user_id, ], - outputs=[ - self.chat_control.conversation, - self.chat_control.conversation, - ], show_progress="hidden", ) @@ -372,10 +364,6 @@ class ChatPage(BasePage): self._suggestion_updated, self._app.user_id, ], - outputs=[ - self.chat_control.conversation, - self.chat_control.conversation, - ], show_progress="hidden", ) @@ -995,96 +983,3 @@ class ChatPage(BasePage): pass return suggested_ques, updated - - def backup_original_info( - self, chat_history, settings, info_pannel, original_chat_history - ): - original_chat_history.append(chat_history[-1]) - return original_chat_history, settings, info_pannel - - def save_log( - self, - conversation_id, - chat_history, - settings, - info_panel, - original_chat_history, - original_settings, - original_info_panel, - log_dir, - ): - if not Path(log_dir).exists(): - Path(log_dir).mkdir(parents=True) - - lock = FileLock(Path(log_dir) / ".lock") - # get current date - today = datetime.now() - formatted_date = today.strftime("%d%m%Y_%H") - - with Session(engine) as session: - statement = select(Conversation).where(Conversation.id == conversation_id) - result = session.exec(statement).one() - - data_source = deepcopy(result.data_source) - likes = data_source.get("likes", []) - if not likes: - return - - feedback = likes[-1][-1] - message_index = likes[-1][0] - - current_message = chat_history[message_index[0]] - original_message = original_chat_history[message_index[0]] - is_original = all( - [ - current_item == original_item - for current_item, original_item in zip( - current_message, original_message - ) - ] - ) - - dataframe = [ - [ - conversation_id, - message_index, - current_message[0], - current_message[1], - chat_history, - settings, - info_panel, - feedback, - is_original, - original_message[1], - original_chat_history, - original_settings, - original_info_panel, - ] - ] - - with lock: - log_file = Path(log_dir) / f"{formatted_date}_log.csv" - is_log_file_exist = log_file.is_file() - with open(log_file, "a") as f: - writer = csv.writer(f) - # write headers - if not is_log_file_exist: - writer.writerow( - [ - "Conversation ID", - "Message ID", - "Question", - "Answer", - "Chat History", - "Settings", - "Evidences", - "Feedback", - "Original/ Rewritten", - "Original Answer", - "Original Chat History", - "Original Settings", - "Original Evidences", - ] - ) - - writer.writerows(dataframe) diff --git a/libs/ktem/ktem/pages/chat/control.py b/libs/ktem/ktem/pages/chat/control.py index 044db3c..989c8ec 100644 --- a/libs/ktem/ktem/pages/chat/control.py +++ b/libs/ktem/ktem/pages/chat/control.py @@ -326,11 +326,7 @@ class ConversationControl(BasePage): ): """Update the conversation's chat suggestions""" if not is_updated: - return ( - gr.update(), - conversation_id, - gr.update(visible=False), - ) + return if user_id is None: gr.Warning("Please sign in first (Settings → User Settings)") @@ -353,13 +349,7 @@ class ConversationControl(BasePage): session.add(result) session.commit() - history = self.load_chat_history(user_id) gr.Info("Chat suggestions updated.") - return ( - gr.update(choices=history), - conversation_id, - gr.update(visible=False), - ) def _on_app_created(self): """Reload the conversation once the app is created"""